* [PATCH v24 01/15] common/symbols: Export hypervisor symbols to privileged guest
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-10 15:04 ` [PATCH v24 02/15] x86/VPMU: Add public xenpmu.h Boris Ostrovsky
` (13 subsequent siblings)
14 siblings, 0 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
Export Xen's symbols as {<address><type><name>} triplet via new XENPF_get_symbol
hypercall
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
Tested-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
---
xen/arch/x86/platform_hypercall.c | 28 +++++++++++++++++++
xen/common/symbols.c | 54 +++++++++++++++++++++++++++++++++++++
xen/include/public/platform.h | 19 +++++++++++++
xen/include/xen/symbols.h | 3 +++
xen/include/xlat.lst | 1 +
xen/xsm/flask/hooks.c | 4 +++
xen/xsm/flask/policy/access_vectors | 2 ++
7 files changed, 111 insertions(+)
diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
index 334d474..7626261 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -23,6 +23,7 @@
#include <xen/cpu.h>
#include <xen/pmstat.h>
#include <xen/irq.h>
+#include <xen/symbols.h>
#include <asm/current.h>
#include <public/platform.h>
#include <acpi/cpufreq/processor_perf.h>
@@ -798,6 +799,33 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op)
}
break;
+ case XENPF_get_symbol:
+ {
+ static char name[KSYM_NAME_LEN + 1]; /* protected by xenpf_lock */
+ XEN_GUEST_HANDLE(char) nameh;
+ uint32_t namelen, copylen;
+
+ guest_from_compat_handle(nameh, op->u.symdata.name);
+
+ ret = xensyms_read(&op->u.symdata.symnum, &op->u.symdata.type,
+ &op->u.symdata.address, name);
+
+ namelen = strlen(name) + 1;
+
+ if ( namelen > op->u.symdata.namelen )
+ copylen = op->u.symdata.namelen;
+ else
+ copylen = namelen;
+
+ op->u.symdata.namelen = namelen;
+
+ if ( !ret && copy_to_guest(nameh, name, copylen) )
+ ret = -EFAULT;
+ if ( !ret && __copy_field_to_guest(u_xenpf_op, op, u.symdata) )
+ ret = -EFAULT;
+ }
+ break;
+
default:
ret = -ENOSYS;
break;
diff --git a/xen/common/symbols.c b/xen/common/symbols.c
index fc7c9e7..a59c59d 100644
--- a/xen/common/symbols.c
+++ b/xen/common/symbols.c
@@ -17,6 +17,8 @@
#include <xen/lib.h>
#include <xen/string.h>
#include <xen/spinlock.h>
+#include <public/platform.h>
+#include <xen/guest_access.h>
#ifdef SYMBOLS_ORIGIN
extern const unsigned int symbols_offsets[];
@@ -148,3 +150,55 @@ const char *symbols_lookup(unsigned long addr,
*offset = addr - symbols_address(low);
return namebuf;
}
+
+/*
+ * Get symbol type information. This is encoded as a single char at the
+ * beginning of the symbol name.
+ */
+static char symbols_get_symbol_type(unsigned int off)
+{
+ /*
+ * Get just the first code, look it up in the token table,
+ * and return the first char from this token.
+ */
+ return symbols_token_table[symbols_token_index[symbols_names[off + 1]]];
+}
+
+int xensyms_read(uint32_t *symnum, char *type,
+ uint64_t *address, char *name)
+{
+ /*
+ * Symbols are most likely accessed sequentially so we remember position
+ * from previous read. This can help us avoid the extra call to
+ * get_symbol_offset().
+ */
+ static uint64_t next_symbol, next_offset;
+ static DEFINE_SPINLOCK(symbols_mutex);
+
+ if ( *symnum > symbols_num_syms )
+ return -ERANGE;
+ if ( *symnum == symbols_num_syms )
+ {
+ /* No more symbols */
+ name[0] = '\0';
+ return 0;
+ }
+
+ spin_lock(&symbols_mutex);
+
+ if ( *symnum == 0 )
+ next_offset = next_symbol = 0;
+ if ( next_symbol != *symnum )
+ /* Non-sequential access */
+ next_offset = get_symbol_offset(*symnum);
+
+ *type = symbols_get_symbol_type(next_offset);
+ next_offset = symbols_expand_symbol(next_offset, name);
+ *address = symbols_address(*symnum);
+
+ next_symbol = ++*symnum;
+
+ spin_unlock(&symbols_mutex);
+
+ return 0;
+}
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index 82ec84e..1e6a6ce 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -590,6 +590,24 @@ struct xenpf_resource_op {
typedef struct xenpf_resource_op xenpf_resource_op_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_resource_op_t);
+#define XENPF_get_symbol 63
+struct xenpf_symdata {
+ /* IN/OUT variables */
+ uint32_t namelen; /* IN: size of name buffer */
+ /* OUT: strlen(name) of hypervisor symbol (may be */
+ /* larger than what's been copied to guest) */
+ uint32_t symnum; /* IN: Symbol to read */
+ /* OUT: Next available symbol. If same as IN then */
+ /* we reached the end */
+
+ /* OUT variables */
+ XEN_GUEST_HANDLE(char) name;
+ uint64_t address;
+ char type;
+};
+typedef struct xenpf_symdata xenpf_symdata_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_symdata_t);
+
/*
* ` enum neg_errnoval
* ` HYPERVISOR_platform_op(const struct xen_platform_op*);
@@ -619,6 +637,7 @@ struct xen_platform_op {
struct xenpf_mem_hotadd mem_add;
struct xenpf_core_parking core_parking;
struct xenpf_resource_op resource_op;
+ struct xenpf_symdata symdata;
uint8_t pad[128];
} u;
};
diff --git a/xen/include/xen/symbols.h b/xen/include/xen/symbols.h
index 87cd77d..1fa0537 100644
--- a/xen/include/xen/symbols.h
+++ b/xen/include/xen/symbols.h
@@ -11,4 +11,7 @@ const char *symbols_lookup(unsigned long addr,
unsigned long *offset,
char *namebuf);
+int xensyms_read(uint32_t *symnum, char *type,
+ uint64_t *address, char *name);
+
#endif /*_XEN_SYMBOLS_H*/
diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
index 9c9fd9a..906e6fc 100644
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -89,6 +89,7 @@
? processor_px platform.h
! psd_package platform.h
? xenpf_enter_acpi_sleep platform.h
+! xenpf_symdata platform.h
? xenpf_pcpuinfo platform.h
? xenpf_pcpu_version platform.h
? xenpf_resource_entry platform.h
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 6e37d29..b4aae27 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1515,6 +1515,10 @@ static int flask_platform_op(uint32_t op)
return avc_current_has_perm(SECINITSID_XEN, SECCLASS_XEN2,
XEN2__RESOURCE_OP, NULL);
+ case XENPF_get_symbol:
+ return avc_has_perm(domain_sid(current->domain), SECINITSID_XEN,
+ SECCLASS_XEN2, XEN2__GET_SYMBOL, NULL);
+
default:
printk("flask_platform_op: Unknown op %d\n", op);
return -EPERM;
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index 68284d5..b35a150 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -85,6 +85,8 @@ class xen2
resource_op
# XEN_SYSCTL_psr_cmt_op
psr_cmt_op
+# XENPF_get_symbol
+ get_symbol
}
# Classes domain and domain2 consist of operations that a domain performs on
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v24 02/15] x86/VPMU: Add public xenpmu.h
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
2015-06-10 15:04 ` [PATCH v24 01/15] common/symbols: Export hypervisor symbols to privileged guest Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-15 14:59 ` Jan Beulich
2015-06-10 15:04 ` [PATCH v24 03/15] x86/VPMU: Make vpmu not HVM-specific Boris Ostrovsky
` (12 subsequent siblings)
14 siblings, 1 reply; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
Add pmu.h header files, move various macros and structures that will be
shared between hypervisor and PV guests to it.
Move MSR banks out of architectural PMU structures to allow for larger sizes
in the future. The banks are allocated immediately after the context and
PMU structures store offsets to them.
While making these updates, also:
* Remove unused vpmu_domain() macro from vpmu.h
* Convert msraddr_to_bitpos() into an inline and make it a little faster by
realizing that all Intel's PMU-related MSRs are in the lower MSR range.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v24
* Moved fixed/arch counter pointers in xen_pmu_intel_ctxt to the top (this will
become useful in patch 12)
* Added zero-sized MSR arrays to xen_pmu_intel_ctxt and xen_pmu_amd_ctxt for
better understanding (or so I think) of how these structures are used
xen/arch/x86/hvm/svm/vpmu.c | 83 +++++++++++----------
xen/arch/x86/hvm/vmx/vpmu_core2.c | 123 +++++++++++++++++--------------
xen/arch/x86/hvm/vpmu.c | 8 ++
xen/arch/x86/oprofile/op_model_ppro.c | 6 +-
xen/include/Makefile | 3 +-
xen/include/asm-x86/hvm/vmx/vpmu_core2.h | 32 --------
xen/include/asm-x86/hvm/vpmu.h | 16 ++--
xen/include/public/arch-arm.h | 5 ++
xen/include/public/arch-x86/pmu.h | 116 +++++++++++++++++++++++++++++
xen/include/public/pmu.h | 58 +++++++++++++++
xen/include/xlat.lst | 6 ++
11 files changed, 322 insertions(+), 134 deletions(-)
delete mode 100644 xen/include/asm-x86/hvm/vmx/vpmu_core2.h
create mode 100644 xen/include/public/arch-x86/pmu.h
create mode 100644 xen/include/public/pmu.h
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 6764070..a8b79df 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -30,10 +30,7 @@
#include <asm/apic.h>
#include <asm/hvm/vlapic.h>
#include <asm/hvm/vpmu.h>
-
-#define F10H_NUM_COUNTERS 4
-#define F15H_NUM_COUNTERS 6
-#define MAX_NUM_COUNTERS F15H_NUM_COUNTERS
+#include <public/pmu.h>
#define MSR_F10H_EVNTSEL_GO_SHIFT 40
#define MSR_F10H_EVNTSEL_EN_SHIFT 22
@@ -49,6 +46,9 @@ static const u32 __read_mostly *counters;
static const u32 __read_mostly *ctrls;
static bool_t __read_mostly k7_counters_mirrored;
+#define F10H_NUM_COUNTERS 4
+#define F15H_NUM_COUNTERS 6
+
/* PMU Counter MSRs. */
static const u32 AMD_F10H_COUNTERS[] = {
MSR_K7_PERFCTR0,
@@ -83,12 +83,14 @@ static const u32 AMD_F15H_CTRLS[] = {
MSR_AMD_FAM15H_EVNTSEL5
};
-/* storage for context switching */
-struct amd_vpmu_context {
- u64 counters[MAX_NUM_COUNTERS];
- u64 ctrls[MAX_NUM_COUNTERS];
- bool_t msr_bitmap_set;
-};
+/* Use private context as a flag for MSR bitmap */
+#define msr_bitmap_on(vpmu) do { \
+ (vpmu)->priv_context = (void *)-1L; \
+ } while (0)
+#define msr_bitmap_off(vpmu) do { \
+ (vpmu)->priv_context = NULL; \
+ } while (0)
+#define is_msr_bitmap_on(vpmu) ((vpmu)->priv_context != NULL)
static inline int get_pmu_reg_type(u32 addr)
{
@@ -142,7 +144,6 @@ static void amd_vpmu_set_msr_bitmap(struct vcpu *v)
{
unsigned int i;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct amd_vpmu_context *ctxt = vpmu->context;
for ( i = 0; i < num_counters; i++ )
{
@@ -150,14 +151,13 @@ static void amd_vpmu_set_msr_bitmap(struct vcpu *v)
svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_WRITE);
}
- ctxt->msr_bitmap_set = 1;
+ msr_bitmap_on(vpmu);
}
static void amd_vpmu_unset_msr_bitmap(struct vcpu *v)
{
unsigned int i;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct amd_vpmu_context *ctxt = vpmu->context;
for ( i = 0; i < num_counters; i++ )
{
@@ -165,7 +165,7 @@ static void amd_vpmu_unset_msr_bitmap(struct vcpu *v)
svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_RW);
}
- ctxt->msr_bitmap_set = 0;
+ msr_bitmap_off(vpmu);
}
static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
@@ -177,19 +177,22 @@ static inline void context_load(struct vcpu *v)
{
unsigned int i;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct amd_vpmu_context *ctxt = vpmu->context;
+ struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+ uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
+ uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
for ( i = 0; i < num_counters; i++ )
{
- wrmsrl(counters[i], ctxt->counters[i]);
- wrmsrl(ctrls[i], ctxt->ctrls[i]);
+ wrmsrl(counters[i], counter_regs[i]);
+ wrmsrl(ctrls[i], ctrl_regs[i]);
}
}
static void amd_vpmu_load(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct amd_vpmu_context *ctxt = vpmu->context;
+ struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+ uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
vpmu_reset(vpmu, VPMU_FROZEN);
@@ -198,7 +201,7 @@ static void amd_vpmu_load(struct vcpu *v)
unsigned int i;
for ( i = 0; i < num_counters; i++ )
- wrmsrl(ctrls[i], ctxt->ctrls[i]);
+ wrmsrl(ctrls[i], ctrl_regs[i]);
return;
}
@@ -212,17 +215,17 @@ static inline void context_save(struct vcpu *v)
{
unsigned int i;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct amd_vpmu_context *ctxt = vpmu->context;
+ struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+ uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
/* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */
for ( i = 0; i < num_counters; i++ )
- rdmsrl(counters[i], ctxt->counters[i]);
+ rdmsrl(counters[i], counter_regs[i]);
}
static int amd_vpmu_save(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct amd_vpmu_context *ctx = vpmu->context;
unsigned int i;
/*
@@ -245,7 +248,7 @@ static int amd_vpmu_save(struct vcpu *v)
context_save(v);
if ( !vpmu_is_set(vpmu, VPMU_RUNNING) &&
- has_hvm_container_vcpu(v) && ctx->msr_bitmap_set )
+ has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
amd_vpmu_unset_msr_bitmap(v);
return 1;
@@ -256,7 +259,9 @@ static void context_update(unsigned int msr, u64 msr_content)
unsigned int i;
struct vcpu *v = current;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct amd_vpmu_context *ctxt = vpmu->context;
+ struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+ uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
+ uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
if ( k7_counters_mirrored &&
((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) )
@@ -268,12 +273,12 @@ static void context_update(unsigned int msr, u64 msr_content)
{
if ( msr == ctrls[i] )
{
- ctxt->ctrls[i] = msr_content;
+ ctrl_regs[i] = msr_content;
return;
}
else if (msr == counters[i] )
{
- ctxt->counters[i] = msr_content;
+ counter_regs[i] = msr_content;
return;
}
}
@@ -303,8 +308,7 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
return 1;
vpmu_set(vpmu, VPMU_RUNNING);
- if ( has_hvm_container_vcpu(v) &&
- !((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set )
+ if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
amd_vpmu_set_msr_bitmap(v);
}
@@ -313,8 +317,7 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
(is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) )
{
vpmu_reset(vpmu, VPMU_RUNNING);
- if ( has_hvm_container_vcpu(v) &&
- ((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set )
+ if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
amd_vpmu_unset_msr_bitmap(v);
release_pmu_ownship(PMU_OWNER_HVM);
}
@@ -355,7 +358,7 @@ static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
static int amd_vpmu_initialise(struct vcpu *v)
{
- struct amd_vpmu_context *ctxt;
+ struct xen_pmu_amd_ctxt *ctxt;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
uint8_t family = current_cpu_data.x86;
@@ -382,7 +385,8 @@ static int amd_vpmu_initialise(struct vcpu *v)
}
}
- ctxt = xzalloc(struct amd_vpmu_context);
+ ctxt = xzalloc_bytes(sizeof(*ctxt) +
+ 2 * sizeof(uint64_t) * num_counters);
if ( !ctxt )
{
gdprintk(XENLOG_WARNING, "Insufficient memory for PMU, "
@@ -391,7 +395,11 @@ static int amd_vpmu_initialise(struct vcpu *v)
return -ENOMEM;
}
+ ctxt->counters = sizeof(*ctxt);
+ ctxt->ctrls = ctxt->counters + sizeof(uint64_t) * num_counters;
+
vpmu->context = ctxt;
+ vpmu->priv_context = NULL;
vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
return 0;
}
@@ -400,8 +408,7 @@ static void amd_vpmu_destroy(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- if ( has_hvm_container_vcpu(v) &&
- ((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set )
+ if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
amd_vpmu_unset_msr_bitmap(v);
xfree(vpmu->context);
@@ -418,7 +425,9 @@ static void amd_vpmu_destroy(struct vcpu *v)
static void amd_vpmu_dump(const struct vcpu *v)
{
const struct vpmu_struct *vpmu = vcpu_vpmu(v);
- const struct amd_vpmu_context *ctxt = vpmu->context;
+ const struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+ const uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
+ const uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
unsigned int i;
printk(" VPMU state: 0x%x ", vpmu->flags);
@@ -448,8 +457,8 @@ static void amd_vpmu_dump(const struct vcpu *v)
rdmsrl(ctrls[i], ctrl);
rdmsrl(counters[i], cntr);
printk(" %#x: %#lx (%#lx in HW) %#x: %#lx (%#lx in HW)\n",
- ctrls[i], ctxt->ctrls[i], ctrl,
- counters[i], ctxt->counters[i], cntr);
+ ctrls[i], ctrl_regs[i], ctrl,
+ counters[i], counter_regs[i], cntr);
}
}
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index 311f35f..6fc634c 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -35,8 +35,8 @@
#include <asm/hvm/vmx/vmcs.h>
#include <public/sched.h>
#include <public/hvm/save.h>
+#include <public/pmu.h>
#include <asm/hvm/vpmu.h>
-#include <asm/hvm/vmx/vpmu_core2.h>
/*
* See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
@@ -68,6 +68,10 @@
#define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0))
static bool_t __read_mostly full_width_write;
+/* Intel-specific VPMU features */
+#define VPMU_CPU_HAS_DS 0x100 /* Has Debug Store */
+#define VPMU_CPU_HAS_BTS 0x200 /* Has Branch Trace Store */
+
/*
* MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed
* counters. 4 bits for every counter.
@@ -75,17 +79,6 @@ static bool_t __read_mostly full_width_write;
#define FIXED_CTR_CTRL_BITS 4
#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
-#define VPMU_CORE2_MAX_FIXED_PMCS 4
-struct core2_vpmu_context {
- u64 fixed_ctrl;
- u64 ds_area;
- u64 pebs_enable;
- u64 global_ovf_status;
- u64 enabled_cntrs; /* Follows PERF_GLOBAL_CTRL MSR format */
- u64 fix_counters[VPMU_CORE2_MAX_FIXED_PMCS];
- struct arch_msr_pair arch_msr_pair[1];
-};
-
/* Number of general-purpose and fixed performance counters */
static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
@@ -222,6 +215,12 @@ static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
}
}
+static inline int msraddr_to_bitpos(int x)
+{
+ ASSERT(x == (x & 0x1fff));
+ return x;
+}
+
static void core2_vpmu_set_msr_bitmap(unsigned long *msr_bitmap)
{
int i;
@@ -291,12 +290,15 @@ static void core2_vpmu_unset_msr_bitmap(unsigned long *msr_bitmap)
static inline void __core2_vpmu_save(struct vcpu *v)
{
int i;
- struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+ uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
+ struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+ vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
for ( i = 0; i < fixed_pmc_cnt; i++ )
- rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, core2_vpmu_cxt->fix_counters[i]);
+ rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
for ( i = 0; i < arch_pmc_cnt; i++ )
- rdmsrl(MSR_IA32_PERFCTR0 + i, core2_vpmu_cxt->arch_msr_pair[i].counter);
+ rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
}
static int core2_vpmu_save(struct vcpu *v)
@@ -319,10 +321,13 @@ static int core2_vpmu_save(struct vcpu *v)
static inline void __core2_vpmu_load(struct vcpu *v)
{
unsigned int i, pmc_start;
- struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+ uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
+ struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+ vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
for ( i = 0; i < fixed_pmc_cnt; i++ )
- wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, core2_vpmu_cxt->fix_counters[i]);
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
if ( full_width_write )
pmc_start = MSR_IA32_A_PERFCTR0;
@@ -330,8 +335,8 @@ static inline void __core2_vpmu_load(struct vcpu *v)
pmc_start = MSR_IA32_PERFCTR0;
for ( i = 0; i < arch_pmc_cnt; i++ )
{
- wrmsrl(pmc_start + i, core2_vpmu_cxt->arch_msr_pair[i].counter);
- wrmsrl(MSR_P6_EVNTSEL(i), core2_vpmu_cxt->arch_msr_pair[i].control);
+ wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter);
+ wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
}
wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl);
@@ -354,7 +359,8 @@ static void core2_vpmu_load(struct vcpu *v)
static int core2_vpmu_alloc_resource(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct core2_vpmu_context *core2_vpmu_cxt;
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL;
+ uint64_t *p = NULL;
if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
return 0;
@@ -367,12 +373,20 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
goto out_err;
vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
- core2_vpmu_cxt = xzalloc_bytes(sizeof(struct core2_vpmu_context) +
- (arch_pmc_cnt-1)*sizeof(struct arch_msr_pair));
- if ( !core2_vpmu_cxt )
+ core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
+ sizeof(uint64_t) * fixed_pmc_cnt +
+ sizeof(struct xen_pmu_cntr_pair) *
+ arch_pmc_cnt);
+ p = xzalloc(uint64_t);
+ if ( !core2_vpmu_cxt || !p )
goto out_err;
- vpmu->context = (void *)core2_vpmu_cxt;
+ core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt);
+ core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters +
+ sizeof(uint64_t) * fixed_pmc_cnt;
+
+ vpmu->context = core2_vpmu_cxt;
+ vpmu->priv_context = p;
vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
@@ -381,6 +395,9 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
out_err:
release_pmu_ownship(PMU_OWNER_HVM);
+ xfree(core2_vpmu_cxt);
+ xfree(p);
+
printk("Failed to allocate VPMU resources for domain %u vcpu %u\n",
v->vcpu_id, v->domain->domain_id);
@@ -418,7 +435,8 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
int type = -1, index = -1;
struct vcpu *v = current;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct core2_vpmu_context *core2_vpmu_cxt = NULL;
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
+ uint64_t *enabled_cntrs;
if ( !core2_vpmu_msr_common_check(msr, &type, &index) )
{
@@ -446,10 +464,11 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
ASSERT(!supported);
core2_vpmu_cxt = vpmu->context;
+ enabled_cntrs = vpmu->priv_context;
switch ( msr )
{
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- core2_vpmu_cxt->global_ovf_status &= ~msr_content;
+ core2_vpmu_cxt->global_status &= ~msr_content;
return 1;
case MSR_CORE_PERF_GLOBAL_STATUS:
gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
@@ -483,15 +502,14 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
break;
case MSR_CORE_PERF_FIXED_CTR_CTRL:
vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
- core2_vpmu_cxt->enabled_cntrs &=
- ~(((1ULL << VPMU_CORE2_MAX_FIXED_PMCS) - 1) << 32);
+ *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
if ( msr_content != 0 )
{
u64 val = msr_content;
for ( i = 0; i < fixed_pmc_cnt; i++ )
{
if ( val & 3 )
- core2_vpmu_cxt->enabled_cntrs |= (1ULL << 32) << i;
+ *enabled_cntrs |= (1ULL << 32) << i;
val >>= FIXED_CTR_CTRL_BITS;
}
}
@@ -502,19 +520,21 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
tmp = msr - MSR_P6_EVNTSEL(0);
if ( tmp >= 0 && tmp < arch_pmc_cnt )
{
+ struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+ vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+
vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
if ( msr_content & (1ULL << 22) )
- core2_vpmu_cxt->enabled_cntrs |= 1ULL << tmp;
+ *enabled_cntrs |= 1ULL << tmp;
else
- core2_vpmu_cxt->enabled_cntrs &= ~(1ULL << tmp);
+ *enabled_cntrs &= ~(1ULL << tmp);
- core2_vpmu_cxt->arch_msr_pair[tmp].control = msr_content;
+ xen_pmu_cntr_pair[tmp].control = msr_content;
}
}
- if ( (global_ctrl & core2_vpmu_cxt->enabled_cntrs) ||
- (core2_vpmu_cxt->ds_area != 0) )
+ if ( (global_ctrl & *enabled_cntrs) || (core2_vpmu_cxt->ds_area != 0) )
vpmu_set(vpmu, VPMU_RUNNING);
else
vpmu_reset(vpmu, VPMU_RUNNING);
@@ -560,7 +580,7 @@ static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
int type = -1, index = -1;
struct vcpu *v = current;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct core2_vpmu_context *core2_vpmu_cxt = NULL;
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
if ( core2_vpmu_msr_common_check(msr, &type, &index) )
{
@@ -571,7 +591,7 @@ static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
*msr_content = 0;
break;
case MSR_CORE_PERF_GLOBAL_STATUS:
- *msr_content = core2_vpmu_cxt->global_ovf_status;
+ *msr_content = core2_vpmu_cxt->global_status;
break;
case MSR_CORE_PERF_GLOBAL_CTRL:
vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
@@ -620,10 +640,12 @@ static void core2_vpmu_dump(const struct vcpu *v)
{
const struct vpmu_struct *vpmu = vcpu_vpmu(v);
unsigned int i;
- const struct core2_vpmu_context *core2_vpmu_cxt = NULL;
+ const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
u64 val;
+ uint64_t *fixed_counters;
+ struct xen_pmu_cntr_pair *cntr_pair;
- if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+ if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
return;
if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
@@ -636,16 +658,15 @@ static void core2_vpmu_dump(const struct vcpu *v)
}
printk(" vPMU running\n");
- core2_vpmu_cxt = vpmu->context;
+
+ cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+ fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
/* Print the contents of the counter and its configuration msr. */
for ( i = 0; i < arch_pmc_cnt; i++ )
- {
- const struct arch_msr_pair *msr_pair = core2_vpmu_cxt->arch_msr_pair;
-
printk(" general_%d: 0x%016lx ctrl: 0x%016lx\n",
- i, msr_pair[i].counter, msr_pair[i].control);
- }
+ i, cntr_pair[i].counter, cntr_pair[i].control);
+
/*
* The configuration of the fixed counter is 4 bits each in the
* MSR_CORE_PERF_FIXED_CTR_CTRL.
@@ -654,7 +675,7 @@ static void core2_vpmu_dump(const struct vcpu *v)
for ( i = 0; i < fixed_pmc_cnt; i++ )
{
printk(" fixed_%d: 0x%016lx ctrl: %#lx\n",
- i, core2_vpmu_cxt->fix_counters[i],
+ i, fixed_counters[i],
val & FIXED_CTR_CTRL_MASK);
val >>= FIXED_CTR_CTRL_BITS;
}
@@ -665,14 +686,14 @@ static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
struct vcpu *v = current;
u64 msr_content;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct core2_vpmu_context *core2_vpmu_cxt = vpmu->context;
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
if ( msr_content )
{
if ( is_pmc_quirk )
handle_pmc_quirk(msr_content);
- core2_vpmu_cxt->global_ovf_status |= msr_content;
+ core2_vpmu_cxt->global_status |= msr_content;
msr_content = 0xC000000700000000 | ((1 << arch_pmc_cnt) - 1);
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
}
@@ -739,13 +760,6 @@ static int core2_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags)
arch_pmc_cnt = core2_get_arch_pmc_count();
fixed_pmc_cnt = core2_get_fixed_pmc_count();
- if ( fixed_pmc_cnt > VPMU_CORE2_MAX_FIXED_PMCS )
- {
- fixed_pmc_cnt = VPMU_CORE2_MAX_FIXED_PMCS;
- printk(XENLOG_G_WARNING "Limiting number of fixed counters to %d\n",
- fixed_pmc_cnt);
- }
-
check_pmc_quirk();
return 0;
}
@@ -755,6 +769,7 @@ static void core2_vpmu_destroy(struct vcpu *v)
struct vpmu_struct *vpmu = vcpu_vpmu(v);
xfree(vpmu->context);
+ xfree(vpmu->priv_context);
if ( has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
release_pmu_ownship(PMU_OWNER_HVM);
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index c3273ee..d829d68 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -32,6 +32,11 @@
#include <asm/hvm/svm/svm.h>
#include <asm/hvm/svm/vmcb.h>
#include <asm/apic.h>
+#include <public/pmu.h>
+
+#include <compat/pmu.h>
+CHECK_pmu_cntr_pair;
+CHECK_pmu_data;
/*
* "vpmu" : vpmu generally enabled
@@ -233,6 +238,9 @@ void vpmu_initialise(struct vcpu *v)
uint8_t vendor = current_cpu_data.x86_vendor;
int ret;
+ BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
+ BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
+
if ( is_pvh_vcpu(v) )
return;
diff --git a/xen/arch/x86/oprofile/op_model_ppro.c b/xen/arch/x86/oprofile/op_model_ppro.c
index aa99e4d..ca429a1 100644
--- a/xen/arch/x86/oprofile/op_model_ppro.c
+++ b/xen/arch/x86/oprofile/op_model_ppro.c
@@ -20,11 +20,15 @@
#include <asm/regs.h>
#include <asm/current.h>
#include <asm/hvm/vpmu.h>
-#include <asm/hvm/vmx/vpmu_core2.h>
#include "op_x86_model.h"
#include "op_counter.h"
+struct arch_msr_pair {
+ u64 counter;
+ u64 control;
+};
+
/*
* Intel "Architectural Performance Monitoring" CPUID
* detection/enumeration details:
diff --git a/xen/include/Makefile b/xen/include/Makefile
index 7c7f6a8..6664107 100644
--- a/xen/include/Makefile
+++ b/xen/include/Makefile
@@ -22,10 +22,11 @@ headers-y := \
compat/version.h \
compat/xen.h \
compat/xenoprof.h
+headers-$(CONFIG_X86) += compat/arch-x86/pmu.h
headers-$(CONFIG_X86) += compat/arch-x86/xen-mca.h
headers-$(CONFIG_X86) += compat/arch-x86/xen.h
headers-$(CONFIG_X86) += compat/arch-x86/xen-$(compat-arch-y).h
-headers-y += compat/arch-$(compat-arch-y).h compat/xlat.h
+headers-y += compat/arch-$(compat-arch-y).h compat/pmu.h compat/xlat.h
headers-$(FLASK_ENABLE) += compat/xsm/flask_op.h
cppflags-y := -include public/xen-compat.h
diff --git a/xen/include/asm-x86/hvm/vmx/vpmu_core2.h b/xen/include/asm-x86/hvm/vmx/vpmu_core2.h
deleted file mode 100644
index 410372d..0000000
--- a/xen/include/asm-x86/hvm/vmx/vpmu_core2.h
+++ /dev/null
@@ -1,32 +0,0 @@
-
-/*
- * vpmu_core2.h: CORE 2 specific PMU virtualization for HVM domain.
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Author: Haitao Shan <haitao.shan@intel.com>
- */
-
-#ifndef __ASM_X86_HVM_VPMU_CORE_H_
-#define __ASM_X86_HVM_VPMU_CORE_H_
-
-struct arch_msr_pair {
- u64 counter;
- u64 control;
-};
-
-#endif /* __ASM_X86_HVM_VPMU_CORE_H_ */
-
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index 9c4e65a..83eea7e 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -22,6 +22,8 @@
#ifndef __ASM_X86_HVM_VPMU_H_
#define __ASM_X86_HVM_VPMU_H_
+#include <public/pmu.h>
+
/*
* Flag bits given as a string on the hypervisor boot parameter 'vpmu'.
* See arch/x86/hvm/vpmu.c.
@@ -29,12 +31,9 @@
#define VPMU_BOOT_ENABLED 0x1 /* vpmu generally enabled. */
#define VPMU_BOOT_BTS 0x2 /* Intel BTS feature wanted. */
-
-#define msraddr_to_bitpos(x) (((x)&0xffff) + ((x)>>31)*0x2000)
#define vcpu_vpmu(vcpu) (&((vcpu)->arch.hvm_vcpu.vpmu))
#define vpmu_vcpu(vpmu) (container_of((vpmu), struct vcpu, \
arch.hvm_vcpu.vpmu))
-#define vpmu_domain(vpmu) (vpmu_vcpu(vpmu)->domain)
#define MSR_TYPE_COUNTER 0
#define MSR_TYPE_CTRL 1
@@ -42,6 +41,9 @@
#define MSR_TYPE_ARCH_COUNTER 3
#define MSR_TYPE_ARCH_CTRL 4
+/* Start of PMU register bank */
+#define vpmu_reg_pointer(ctxt, offset) ((void *)((uintptr_t)ctxt + \
+ (uintptr_t)ctxt->offset))
/* Arch specific operations shared by all vpmus */
struct arch_vpmu_ops {
@@ -65,7 +67,8 @@ struct vpmu_struct {
u32 flags;
u32 last_pcpu;
u32 hw_lapic_lvtpc;
- void *context;
+ void *context; /* May be shared with PV guest */
+ void *priv_context; /* hypervisor-only */
struct arch_vpmu_ops *arch_vpmu_ops;
};
@@ -77,11 +80,6 @@ struct vpmu_struct {
#define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not running */
#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20
-/* VPMU features */
-#define VPMU_CPU_HAS_DS 0x100 /* Has Debug Store */
-#define VPMU_CPU_HAS_BTS 0x200 /* Has Branch Trace Store */
-
-
static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
{
vpmu->flags |= mask;
diff --git a/xen/include/public/arch-arm.h b/xen/include/public/arch-arm.h
index 6f24c5f..d127746 100644
--- a/xen/include/public/arch-arm.h
+++ b/xen/include/public/arch-arm.h
@@ -434,6 +434,11 @@ typedef uint64_t xen_callback_t;
#endif
+#ifndef __ASSEMBLY__
+/* Stub definition of PMU structure */
+typedef struct xen_pmu_arch { uint8_t dummy; } xen_pmu_arch_t;
+#endif
+
#endif /* __XEN_PUBLIC_ARCH_ARM_H__ */
/*
diff --git a/xen/include/public/arch-x86/pmu.h b/xen/include/public/arch-x86/pmu.h
new file mode 100644
index 0000000..4351115
--- /dev/null
+++ b/xen/include/public/arch-x86/pmu.h
@@ -0,0 +1,116 @@
+#ifndef __XEN_PUBLIC_ARCH_X86_PMU_H__
+#define __XEN_PUBLIC_ARCH_X86_PMU_H__
+
+/* x86-specific PMU definitions */
+
+/* AMD PMU registers and structures */
+struct xen_pmu_amd_ctxt {
+ /* Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd) */
+ uint32_t counters;
+ uint32_t ctrls;
+
+ /* Counter MSRs */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ uint64_t regs[];
+#elif defined(__GNUC__)
+ uint64_t regs[0];
+#endif
+};
+typedef struct xen_pmu_amd_ctxt xen_pmu_amd_ctxt_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pmu_amd_ctxt_t);
+
+/* Intel PMU registers and structures */
+struct xen_pmu_cntr_pair {
+ uint64_t counter;
+ uint64_t control;
+};
+typedef struct xen_pmu_cntr_pair xen_pmu_cntr_pair_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pmu_cntr_pair_t);
+
+struct xen_pmu_intel_ctxt {
+ /*
+ * Offsets to fixed and architectural counter MSRs (relative to
+ * xen_pmu_arch.c.intel)
+ */
+ uint32_t fixed_counters;
+ uint32_t arch_counters;
+
+ /* PMU registers */
+ uint64_t global_ctrl;
+ uint64_t global_ovf_ctrl;
+ uint64_t global_status;
+ uint64_t fixed_ctrl;
+ uint64_t ds_area;
+ uint64_t pebs_enable;
+ uint64_t debugctl;
+
+ /* Fixed and architectural counter MSRs */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ uint64_t regs[];
+#elif defined(__GNUC__)
+ uint64_t regs[0];
+#endif
+};
+typedef struct xen_pmu_intel_ctxt xen_pmu_intel_ctxt_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pmu_intel_ctxt_t);
+
+/* Sampled domain's registers */
+struct xen_pmu_regs {
+ uint64_t ip;
+ uint64_t sp;
+ uint64_t flags;
+ uint16_t cs;
+ uint16_t ss;
+ uint8_t cpl;
+ uint8_t pad[3];
+};
+typedef struct xen_pmu_regs xen_pmu_regs_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pmu_regs_t);
+
+/* PMU flags */
+#define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */
+
+/*
+ * Architecture-specific information describing state of the processor at
+ * the time of PMU interrupt.
+ * Fields of this structure marked as RW for guest should only be written by
+ * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the
+ * hypervisor during PMU interrupt). Hypervisor will read updated data in
+ * XENPMU_flush hypercall and clear PMU_CACHED bit.
+ */
+struct xen_pmu_arch {
+ union {
+ /*
+ * Processor's registers at the time of interrupt.
+ * WO for hypervisor, RO for guests.
+ */
+ struct xen_pmu_regs regs;
+ /* Padding for adding new registers to xen_pmu_regs in the future */
+#define XENPMU_REGS_PAD_SZ 64
+ uint8_t pad[XENPMU_REGS_PAD_SZ];
+ } r;
+
+ /* WO for hypervisor, RO for guest */
+ uint64_t pmu_flags;
+
+ /* Placeholder for APIC LVTPC register */
+ uint64_t lvtpc_pad;
+
+ /* Placeholder for vendor-specific PMU registers */
+#define XENPMU_CTXT_PAD_SZ 128
+ uint64_t pmu_regs_pad[XENPMU_CTXT_PAD_SZ / 8];
+};
+typedef struct xen_pmu_arch xen_pmu_arch_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pmu_arch_t);
+
+#endif /* __XEN_PUBLIC_ARCH_X86_PMU_H__ */
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
new file mode 100644
index 0000000..844159e
--- /dev/null
+++ b/xen/include/public/pmu.h
@@ -0,0 +1,58 @@
+#ifndef __XEN_PUBLIC_PMU_H__
+#define __XEN_PUBLIC_PMU_H__
+
+#include "xen.h"
+#if defined(__i386__) || defined(__x86_64__)
+#include "arch-x86/pmu.h"
+#elif defined (__arm__) || defined (__aarch64__)
+#include "arch-arm.h"
+#else
+#error "Unsupported architecture"
+#endif
+
+#define XENPMU_VER_MAJ 0
+#define XENPMU_VER_MIN 1
+
+/*
+ * Shared PMU data between hypervisor and PV(H) domains.
+ *
+ * The hypervisor fills out this structure during PMU interrupt and sends an
+ * interrupt to appropriate VCPU.
+ * Architecture-independent fields of xen_pmu_data are WO for the hypervisor
+ * and RO for the guest but some fields in xen_pmu_arch can be writable
+ * by both the hypervisor and the guest (see arch-$arch/pmu.h).
+ */
+struct xen_pmu_data {
+ /* Interrupted VCPU */
+ uint32_t vcpu_id;
+
+ /*
+ * Physical processor on which the interrupt occurred. On non-privileged
+ * guests set to vcpu_id;
+ */
+ uint32_t pcpu_id;
+
+ /*
+ * Domain that was interrupted. On non-privileged guests set to DOMID_SELF.
+ * On privileged guests can be DOMID_SELF, DOMID_XEN, or, when in
+ * XENPMU_MODE_ALL mode, domain ID of another domain.
+ */
+ domid_t domain_id;
+
+ uint8_t pad[6];
+
+ /* Architecture-specific information */
+ struct xen_pmu_arch pmu;
+};
+
+#endif /* __XEN_PUBLIC_PMU_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
index 906e6fc..997d2db 100644
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -8,6 +8,11 @@
! start_info xen.h
? vcpu_info xen.h
? vcpu_time_info xen.h
+? pmu_amd_ctxt arch-x86/pmu.h
+? pmu_arch arch-x86/pmu.h
+? pmu_cntr_pair arch-x86/pmu.h
+? pmu_intel_ctxt arch-x86/pmu.h
+? pmu_regs arch-x86/pmu.h
! cpu_user_regs arch-x86/xen-@arch@.h
! trap_info arch-x86/xen.h
? cpu_offline_action arch-x86/xen-mca.h
@@ -93,6 +98,7 @@
? xenpf_pcpuinfo platform.h
? xenpf_pcpu_version platform.h
? xenpf_resource_entry platform.h
+? pmu_data pmu.h
! sched_poll sched.h
? sched_remote_shutdown sched.h
? sched_shutdown sched.h
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v24 02/15] x86/VPMU: Add public xenpmu.h
2015-06-10 15:04 ` [PATCH v24 02/15] x86/VPMU: Add public xenpmu.h Boris Ostrovsky
@ 2015-06-15 14:59 ` Jan Beulich
0 siblings, 0 replies; 36+ messages in thread
From: Jan Beulich @ 2015-06-15 14:59 UTC (permalink / raw)
To: Boris Ostrovsky
Cc: kevin.tian, suravee.suthikulpanit, andrew.cooper3, tim,
dietmar.hahn, xen-devel, Aravind.Gopalakrishnan, jun.nakajima,
dgdegra
>>> On 10.06.15 at 17:04, <boris.ostrovsky@oracle.com> wrote:
> Add pmu.h header files, move various macros and structures that will be
> shared between hypervisor and PV guests to it.
>
> Move MSR banks out of architectural PMU structures to allow for larger sizes
> in the future. The banks are allocated immediately after the context and
> PMU structures store offsets to them.
>
> While making these updates, also:
> * Remove unused vpmu_domain() macro from vpmu.h
> * Convert msraddr_to_bitpos() into an inline and make it a little faster by
> realizing that all Intel's PMU-related MSRs are in the lower MSR range.
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v24 03/15] x86/VPMU: Make vpmu not HVM-specific
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
2015-06-10 15:04 ` [PATCH v24 01/15] common/symbols: Export hypervisor symbols to privileged guest Boris Ostrovsky
2015-06-10 15:04 ` [PATCH v24 02/15] x86/VPMU: Add public xenpmu.h Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-10 15:04 ` [PATCH v24 04/15] x86/VPMU: Interface for setting PMU mode and flags Boris Ostrovsky
` (11 subsequent siblings)
14 siblings, 0 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
vpmu structure will be used for both HVM and PV guests. Move it from
hvm_vcpu to arch_vcpu.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
Tested-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
---
xen/include/asm-x86/domain.h | 2 ++
xen/include/asm-x86/hvm/vcpu.h | 3 ---
xen/include/asm-x86/hvm/vpmu.h | 5 ++---
3 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index a3c117f..4f2d6aa 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -446,6 +446,8 @@ struct arch_vcpu
void (*ctxt_switch_from) (struct vcpu *);
void (*ctxt_switch_to) (struct vcpu *);
+ struct vpmu_struct vpmu;
+
/* Virtual Machine Extensions */
union {
struct pv_vcpu pv_vcpu;
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index 3d8f4dc..0faf60d 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -151,9 +151,6 @@ struct hvm_vcpu {
u32 msr_tsc_aux;
u64 msr_tsc_adjust;
- /* VPMU */
- struct vpmu_struct vpmu;
-
union {
struct arch_vmx_struct vmx;
struct arch_svm_struct svm;
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index 83eea7e..82bfa0e 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -31,9 +31,8 @@
#define VPMU_BOOT_ENABLED 0x1 /* vpmu generally enabled. */
#define VPMU_BOOT_BTS 0x2 /* Intel BTS feature wanted. */
-#define vcpu_vpmu(vcpu) (&((vcpu)->arch.hvm_vcpu.vpmu))
-#define vpmu_vcpu(vpmu) (container_of((vpmu), struct vcpu, \
- arch.hvm_vcpu.vpmu))
+#define vcpu_vpmu(vcpu) (&(vcpu)->arch.vpmu)
+#define vpmu_vcpu(vpmu) container_of((vpmu), struct vcpu, arch.vpmu)
#define MSR_TYPE_COUNTER 0
#define MSR_TYPE_CTRL 1
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v24 04/15] x86/VPMU: Interface for setting PMU mode and flags
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
` (2 preceding siblings ...)
2015-06-10 15:04 ` [PATCH v24 03/15] x86/VPMU: Make vpmu not HVM-specific Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-11 8:17 ` Tian, Kevin
2015-06-10 15:04 ` [PATCH v24 05/15] x86/VPMU: Initialize VPMUs with __initcall Boris Ostrovsky
` (10 subsequent siblings)
14 siblings, 1 reply; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
Add runtime interface for setting PMU mode and flags. Three main modes are
provided:
* XENPMU_MODE_OFF: PMU is not virtualized
* XENPMU_MODE_SELF: Guests can access PMU MSRs and receive PMU interrupts.
* XENPMU_MODE_HV: Same as XENPMU_MODE_SELF for non-proviledged guests, dom0
can profile itself and the hypervisor.
Note that PMU modes are different from what can be provided at Xen's boot line
with 'vpmu' argument. An 'off' (or '0') value is equivalent to XENPMU_MODE_OFF.
Any other value, on the other hand, will cause VPMU mode to be set to
XENPMU_MODE_SELF during boot.
For feature flags only Intel's BTS is currently supported.
Mode and flags are set via HYPERVISOR_xenpmu_op hypercall.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
Acked-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
---
tools/flask/policy/policy/modules/xen/xen.te | 3 +
xen/arch/x86/domain.c | 4 +-
xen/arch/x86/hvm/svm/vpmu.c | 4 +-
xen/arch/x86/hvm/vmx/vpmu_core2.c | 10 +-
xen/arch/x86/hvm/vpmu.c | 159 +++++++++++++++++++++++++--
xen/arch/x86/x86_64/compat/entry.S | 4 +
xen/arch/x86/x86_64/entry.S | 4 +
xen/include/asm-x86/hvm/vpmu.h | 27 +++--
xen/include/public/pmu.h | 46 ++++++++
xen/include/public/xen.h | 1 +
xen/include/xen/hypercall.h | 4 +
xen/include/xlat.lst | 1 +
xen/include/xsm/dummy.h | 15 +++
xen/include/xsm/xsm.h | 6 +
xen/xsm/dummy.c | 1 +
xen/xsm/flask/hooks.c | 18 +++
xen/xsm/flask/policy/access_vectors | 2 +
17 files changed, 284 insertions(+), 25 deletions(-)
diff --git a/tools/flask/policy/policy/modules/xen/xen.te b/tools/flask/policy/policy/modules/xen/xen.te
index 51f59c5..45b5cb2 100644
--- a/tools/flask/policy/policy/modules/xen/xen.te
+++ b/tools/flask/policy/policy/modules/xen/xen.te
@@ -68,6 +68,9 @@ allow dom0_t xen_t:xen2 {
resource_op
psr_cmt_op
};
+allow dom0_t xen_t:xen2 {
+ pmu_ctrl
+};
allow dom0_t xen_t:mmu memorymap;
# Allow dom0 to use these domctls on itself. For domctls acting on other
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index db073a6..8cc3ae7 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1540,7 +1540,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
if ( is_hvm_vcpu(prev) )
{
if (prev != next)
- vpmu_save(prev);
+ vpmu_switch_from(prev);
if ( !list_empty(&prev->arch.hvm_vcpu.tm_list) )
pt_save_timer(prev);
@@ -1585,7 +1585,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
if (is_hvm_vcpu(next) && (prev != next) )
/* Must be done with interrupts enabled */
- vpmu_load(next);
+ vpmu_switch_to(next);
context_saved(prev);
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index a8b79df..481ea7b 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -472,14 +472,14 @@ struct arch_vpmu_ops amd_vpmu_ops = {
.arch_vpmu_dump = amd_vpmu_dump
};
-int svm_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags)
+int svm_vpmu_initialise(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
uint8_t family = current_cpu_data.x86;
int ret = 0;
/* vpmu enabled? */
- if ( !vpmu_flags )
+ if ( vpmu_mode == XENPMU_MODE_OFF )
return 0;
switch ( family )
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index 6fc634c..cfcdf42 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -708,13 +708,13 @@ static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
return 1;
}
-static int core2_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags)
+static int core2_vpmu_initialise(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
u64 msr_content;
static bool_t ds_warned;
- if ( !(vpmu_flags & VPMU_BOOT_BTS) )
+ if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
goto func_out;
/* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
while ( boot_cpu_has(X86_FEATURE_DS) )
@@ -826,7 +826,7 @@ struct arch_vpmu_ops core2_no_vpmu_ops = {
.do_cpuid = core2_no_vpmu_do_cpuid,
};
-int vmx_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags)
+int vmx_vpmu_initialise(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
uint8_t family = current_cpu_data.x86;
@@ -834,7 +834,7 @@ int vmx_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags)
int ret = 0;
vpmu->arch_vpmu_ops = &core2_no_vpmu_ops;
- if ( !vpmu_flags )
+ if ( vpmu_mode == XENPMU_MODE_OFF )
return 0;
if ( family == 6 )
@@ -884,7 +884,7 @@ int vmx_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags)
/* next gen Xeon Phi */
case 0x57:
- ret = core2_vpmu_initialise(v, vpmu_flags);
+ ret = core2_vpmu_initialise(v);
if ( !ret )
vpmu->arch_vpmu_ops = &core2_vpmu_ops;
return ret;
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index d829d68..5cfb2f0 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -21,6 +21,8 @@
#include <xen/config.h>
#include <xen/sched.h>
#include <xen/xenoprof.h>
+#include <xen/event.h>
+#include <xen/guest_access.h>
#include <asm/regs.h>
#include <asm/types.h>
#include <asm/msr.h>
@@ -33,10 +35,12 @@
#include <asm/hvm/svm/vmcb.h>
#include <asm/apic.h>
#include <public/pmu.h>
+#include <xsm/xsm.h>
#include <compat/pmu.h>
CHECK_pmu_cntr_pair;
CHECK_pmu_data;
+CHECK_pmu_params;
/*
* "vpmu" : vpmu generally enabled
@@ -44,9 +48,14 @@ CHECK_pmu_data;
* "vpmu=bts" : vpmu enabled and Intel BTS feature switched on.
*/
static unsigned int __read_mostly opt_vpmu_enabled;
+unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF;
+unsigned int __read_mostly vpmu_features = 0;
static void parse_vpmu_param(char *s);
custom_param("vpmu", parse_vpmu_param);
+static DEFINE_SPINLOCK(vpmu_lock);
+static unsigned vpmu_count;
+
static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
static void __init parse_vpmu_param(char *s)
@@ -57,7 +66,7 @@ static void __init parse_vpmu_param(char *s)
break;
default:
if ( !strcmp(s, "bts") )
- opt_vpmu_enabled |= VPMU_BOOT_BTS;
+ vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
else if ( *s )
{
printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
@@ -65,7 +74,9 @@ static void __init parse_vpmu_param(char *s)
}
/* fall through */
case 1:
- opt_vpmu_enabled |= VPMU_BOOT_ENABLED;
+ /* Default VPMU mode */
+ vpmu_mode = XENPMU_MODE_SELF;
+ opt_vpmu_enabled = 1;
break;
}
}
@@ -74,7 +85,7 @@ void vpmu_lvtpc_update(uint32_t val)
{
struct vpmu_struct *vpmu;
- if ( !opt_vpmu_enabled )
+ if ( vpmu_mode == XENPMU_MODE_OFF )
return;
vpmu = vcpu_vpmu(current);
@@ -87,6 +98,9 @@ int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported)
{
struct vpmu_struct *vpmu = vcpu_vpmu(current);
+ if ( vpmu_mode == XENPMU_MODE_OFF )
+ return 0;
+
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
return 0;
@@ -96,6 +110,12 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
{
struct vpmu_struct *vpmu = vcpu_vpmu(current);
+ if ( vpmu_mode == XENPMU_MODE_OFF )
+ {
+ *msr_content = 0;
+ return 0;
+ }
+
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
return 0;
@@ -246,28 +266,45 @@ void vpmu_initialise(struct vcpu *v)
ASSERT(!vpmu->flags && !vpmu->context);
+ /*
+ * Count active VPMUs so that we won't try to change vpmu_mode while
+ * they are in use.
+ */
+ spin_lock(&vpmu_lock);
+ vpmu_count++;
+ spin_unlock(&vpmu_lock);
+
switch ( vendor )
{
case X86_VENDOR_AMD:
- ret = svm_vpmu_initialise(v, opt_vpmu_enabled);
+ ret = svm_vpmu_initialise(v);
break;
case X86_VENDOR_INTEL:
- ret = vmx_vpmu_initialise(v, opt_vpmu_enabled);
+ ret = vmx_vpmu_initialise(v);
break;
default:
- if ( opt_vpmu_enabled )
+ if ( vpmu_mode != XENPMU_MODE_OFF )
{
printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
"Disabling VPMU\n", vendor);
opt_vpmu_enabled = 0;
+ vpmu_mode = XENPMU_MODE_OFF;
}
- return;
+ return; /* Don't bother restoring vpmu_count, VPMU is off forever */
}
if ( ret )
printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
+
+ /* Intel needs to initialize VPMU ops even if VPMU is not in use */
+ if ( ret || (vpmu_mode == XENPMU_MODE_OFF) )
+ {
+ spin_lock(&vpmu_lock);
+ vpmu_count--;
+ spin_unlock(&vpmu_lock);
+ }
}
static void vpmu_clear_last(void *arg)
@@ -296,6 +333,10 @@ void vpmu_destroy(struct vcpu *v)
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
+
+ spin_lock(&vpmu_lock);
+ vpmu_count--;
+ spin_unlock(&vpmu_lock);
}
/* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */
@@ -307,6 +348,109 @@ void vpmu_dump(struct vcpu *v)
vpmu->arch_vpmu_ops->arch_vpmu_dump(v);
}
+long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
+{
+ int ret;
+ struct xen_pmu_params pmu_params = {.val = 0};
+
+ if ( !opt_vpmu_enabled )
+ return -EOPNOTSUPP;
+
+ ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
+ if ( ret )
+ return ret;
+
+ /* Check major version when parameters are specified */
+ switch ( op )
+ {
+ case XENPMU_mode_set:
+ case XENPMU_feature_set:
+ if ( copy_from_guest(&pmu_params, arg, 1) )
+ return -EFAULT;
+
+ if ( pmu_params.version.maj != XENPMU_VER_MAJ )
+ return -EINVAL;
+ }
+
+ switch ( op )
+ {
+ case XENPMU_mode_set:
+ {
+ if ( (pmu_params.val & ~(XENPMU_MODE_SELF | XENPMU_MODE_HV)) ||
+ (hweight64(pmu_params.val) > 1) )
+ return -EINVAL;
+
+ /* 32-bit dom0 can only sample itself. */
+ if ( is_pv_32bit_vcpu(current) && (pmu_params.val & XENPMU_MODE_HV) )
+ return -EINVAL;
+
+ spin_lock(&vpmu_lock);
+
+ /*
+ * We can always safely switch between XENPMU_MODE_SELF and
+ * XENPMU_MODE_HV while other VPMUs are active.
+ */
+ if ( (vpmu_count == 0) || (vpmu_mode == pmu_params.val) ||
+ ((vpmu_mode ^ pmu_params.val) ==
+ (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
+ vpmu_mode = pmu_params.val;
+ else
+ {
+ printk(XENLOG_WARNING "VPMU: Cannot change mode while"
+ " active VPMUs exist\n");
+ ret = -EBUSY;
+ }
+
+ spin_unlock(&vpmu_lock);
+
+ break;
+ }
+
+ case XENPMU_mode_get:
+ memset(&pmu_params, 0, sizeof(pmu_params));
+ pmu_params.val = vpmu_mode;
+
+ pmu_params.version.maj = XENPMU_VER_MAJ;
+ pmu_params.version.min = XENPMU_VER_MIN;
+
+ if ( copy_to_guest(arg, &pmu_params, 1) )
+ return -EFAULT;
+
+ break;
+
+ case XENPMU_feature_set:
+ if ( pmu_params.val & ~XENPMU_FEATURE_INTEL_BTS )
+ return -EINVAL;
+
+ spin_lock(&vpmu_lock);
+
+ if ( vpmu_count == 0 )
+ vpmu_features = pmu_params.val;
+ else
+ {
+ printk(XENLOG_WARNING "VPMU: Cannot change features while"
+ " active VPMUs exist\n");
+ ret = -EBUSY;
+ }
+
+ spin_unlock(&vpmu_lock);
+
+ break;
+
+ case XENPMU_feature_get:
+ pmu_params.val = vpmu_features;
+ if ( copy_field_to_guest(arg, &pmu_params, val) )
+ return -EFAULT;
+
+ break;
+
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
static int __init vpmu_init(void)
{
/* NMI watchdog uses LVTPC and HW counter */
@@ -314,6 +458,7 @@ static int __init vpmu_init(void)
{
printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n");
opt_vpmu_enabled = 0;
+ vpmu_mode = XENPMU_MODE_OFF;
}
return 0;
diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index 46f340b..1521779 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -430,6 +430,8 @@ ENTRY(compat_hypercall_table)
.quad do_domctl
.quad compat_kexec_op
.quad do_tmem_op
+ .quad do_ni_hypercall /* reserved for XenClient */
+ .quad do_xenpmu_op /* 40 */
.rept __HYPERVISOR_arch_0-((.-compat_hypercall_table)/8)
.quad compat_ni_hypercall
.endr
@@ -479,6 +481,8 @@ ENTRY(compat_hypercall_args_table)
.byte 1 /* do_domctl */
.byte 2 /* compat_kexec_op */
.byte 1 /* do_tmem_op */
+ .byte 0 /* reserved for XenClient */
+ .byte 2 /* do_xenpmu_op */ /* 40 */
.rept __HYPERVISOR_arch_0-(.-compat_hypercall_args_table)
.byte 0 /* compat_ni_hypercall */
.endr
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 4a37642..74677a2 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -763,6 +763,8 @@ ENTRY(hypercall_table)
.quad do_domctl
.quad do_kexec_op
.quad do_tmem_op
+ .quad do_ni_hypercall /* reserved for XenClient */
+ .quad do_xenpmu_op /* 40 */
.rept __HYPERVISOR_arch_0-((.-hypercall_table)/8)
.quad do_ni_hypercall
.endr
@@ -812,6 +814,8 @@ ENTRY(hypercall_args_table)
.byte 1 /* do_domctl */
.byte 2 /* do_kexec */
.byte 1 /* do_tmem_op */
+ .byte 0 /* reserved for XenClient */
+ .byte 2 /* do_xenpmu_op */ /* 40 */
.rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index 82bfa0e..88ffc19 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -24,13 +24,6 @@
#include <public/pmu.h>
-/*
- * Flag bits given as a string on the hypervisor boot parameter 'vpmu'.
- * See arch/x86/hvm/vpmu.c.
- */
-#define VPMU_BOOT_ENABLED 0x1 /* vpmu generally enabled. */
-#define VPMU_BOOT_BTS 0x2 /* Intel BTS feature wanted. */
-
#define vcpu_vpmu(vcpu) (&(vcpu)->arch.vpmu)
#define vpmu_vcpu(vpmu) container_of((vpmu), struct vcpu, arch.vpmu)
@@ -59,8 +52,8 @@ struct arch_vpmu_ops {
void (*arch_vpmu_dump)(const struct vcpu *);
};
-int vmx_vpmu_initialise(struct vcpu *, unsigned int flags);
-int svm_vpmu_initialise(struct vcpu *, unsigned int flags);
+int vmx_vpmu_initialise(struct vcpu *);
+int svm_vpmu_initialise(struct vcpu *);
struct vpmu_struct {
u32 flags;
@@ -116,5 +109,21 @@ void vpmu_dump(struct vcpu *v);
extern int acquire_pmu_ownership(int pmu_ownership);
extern void release_pmu_ownership(int pmu_ownership);
+extern unsigned int vpmu_mode;
+extern unsigned int vpmu_features;
+
+/* Context switch */
+static inline void vpmu_switch_from(struct vcpu *prev)
+{
+ if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
+ vpmu_save(prev);
+}
+
+static inline void vpmu_switch_to(struct vcpu *next)
+{
+ if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
+ vpmu_load(next);
+}
+
#endif /* __ASM_X86_HVM_VPMU_H_*/
diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
index 844159e..50cc048 100644
--- a/xen/include/public/pmu.h
+++ b/xen/include/public/pmu.h
@@ -14,6 +14,52 @@
#define XENPMU_VER_MIN 1
/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_xenpmu_op(enum xenpmu_op cmd, struct xenpmu_params *args);
+ *
+ * @cmd == XENPMU_* (PMU operation)
+ * @args == struct xenpmu_params
+ */
+/* ` enum xenpmu_op { */
+#define XENPMU_mode_get 0 /* Also used for getting PMU version */
+#define XENPMU_mode_set 1
+#define XENPMU_feature_get 2
+#define XENPMU_feature_set 3
+/* ` } */
+
+/* Parameters structure for HYPERVISOR_xenpmu_op call */
+struct xen_pmu_params {
+ /* IN/OUT parameters */
+ struct {
+ uint32_t maj;
+ uint32_t min;
+ } version;
+ uint64_t val;
+
+ /* IN parameters */
+ uint32_t vcpu;
+ uint32_t pad;
+};
+typedef struct xen_pmu_params xen_pmu_params_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t);
+
+/* PMU modes:
+ * - XENPMU_MODE_OFF: No PMU virtualization
+ * - XENPMU_MODE_SELF: Guests can profile themselves
+ * - XENPMU_MODE_HV: Guests can profile themselves, dom0 profiles
+ * itself and Xen
+ */
+#define XENPMU_MODE_OFF 0
+#define XENPMU_MODE_SELF (1<<0)
+#define XENPMU_MODE_HV (1<<1)
+
+/*
+ * PMU features:
+ * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD)
+ */
+#define XENPMU_FEATURE_INTEL_BTS 1
+
+/*
* Shared PMU data between hypervisor and PV(H) domains.
*
* The hypervisor fills out this structure during PMU interrupt and sends an
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 17ecb94..59fb544 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -101,6 +101,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
#define __HYPERVISOR_kexec_op 37
#define __HYPERVISOR_tmem_op 38
#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */
+#define __HYPERVISOR_xenpmu_op 40
/* Architecture-specific hypercall definitions. */
#define __HYPERVISOR_arch_0 48
diff --git a/xen/include/xen/hypercall.h b/xen/include/xen/hypercall.h
index eda8a36..ef665db 100644
--- a/xen/include/xen/hypercall.h
+++ b/xen/include/xen/hypercall.h
@@ -14,6 +14,7 @@
#include <public/event_channel.h>
#include <public/tmem.h>
#include <public/version.h>
+#include <public/pmu.h>
#include <asm/hypercall.h>
#include <xsm/xsm.h>
@@ -144,6 +145,9 @@ do_tmem_op(
extern long
do_xenoprof_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg);
+extern long
+do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg);
+
#ifdef CONFIG_COMPAT
extern int
diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
index 997d2db..7fdd0ea 100644
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -99,6 +99,7 @@
? xenpf_pcpu_version platform.h
? xenpf_resource_entry platform.h
? pmu_data pmu.h
+? pmu_params pmu.h
! sched_poll sched.h
? sched_remote_shutdown sched.h
? sched_shutdown sched.h
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index f044c0f..a496116 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -694,4 +694,19 @@ static XSM_INLINE int xsm_ioport_mapping(XSM_DEFAULT_ARG struct domain *d, uint3
return xsm_default_action(action, current->domain, d);
}
+static XSM_INLINE int xsm_pmu_op (XSM_DEFAULT_ARG struct domain *d, int op)
+{
+ XSM_ASSERT_ACTION(XSM_OTHER);
+ switch ( op )
+ {
+ case XENPMU_mode_set:
+ case XENPMU_mode_get:
+ case XENPMU_feature_set:
+ case XENPMU_feature_get:
+ return xsm_default_action(XSM_PRIV, d, current->domain);
+ default:
+ return -EPERM;
+ }
+}
+
#endif /* CONFIG_X86 */
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index c872d44..4e555d5 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -188,6 +188,7 @@ struct xsm_operations {
int (*priv_mapping) (struct domain *d, struct domain *t);
int (*ioport_permission) (struct domain *d, uint32_t s, uint32_t e, uint8_t allow);
int (*ioport_mapping) (struct domain *d, uint32_t s, uint32_t e, uint8_t allow);
+ int (*pmu_op) (struct domain *d, unsigned int op);
#endif
};
@@ -715,6 +716,11 @@ static inline int xsm_ioport_mapping (xsm_default_t def, struct domain *d, uint3
return xsm_ops->ioport_mapping(d, s, e, allow);
}
+static inline int xsm_pmu_op (xsm_default_t def, struct domain *d, int op)
+{
+ return xsm_ops->pmu_op(d, op);
+}
+
#endif /* CONFIG_X86 */
#endif /* XSM_NO_WRAPPERS */
diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
index e84b0e4..06ac911 100644
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -159,5 +159,6 @@ void xsm_fixup_ops (struct xsm_operations *ops)
set_to_dummy_if_null(ops, priv_mapping);
set_to_dummy_if_null(ops, ioport_permission);
set_to_dummy_if_null(ops, ioport_mapping);
+ set_to_dummy_if_null(ops, pmu_op);
#endif
}
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index b4aae27..ac98966 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1579,6 +1579,23 @@ static int flask_priv_mapping(struct domain *d, struct domain *t)
{
return domain_has_perm(d, t, SECCLASS_MMU, MMU__TARGET_HACK);
}
+
+static int flask_pmu_op (struct domain *d, unsigned int op)
+{
+ u32 dsid = domain_sid(d);
+
+ switch ( op )
+ {
+ case XENPMU_mode_set:
+ case XENPMU_mode_get:
+ case XENPMU_feature_set:
+ case XENPMU_feature_get:
+ return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2,
+ XEN2__PMU_CTRL, NULL);
+ default:
+ return -EPERM;
+ }
+}
#endif /* CONFIG_X86 */
long do_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op);
@@ -1716,6 +1733,7 @@ static struct xsm_operations flask_ops = {
.priv_mapping = flask_priv_mapping,
.ioport_permission = flask_ioport_permission,
.ioport_mapping = flask_ioport_mapping,
+ .pmu_op = flask_pmu_op,
#endif
};
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index b35a150..3a5d798 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -87,6 +87,8 @@ class xen2
psr_cmt_op
# XENPF_get_symbol
get_symbol
+# PMU control
+ pmu_ctrl
}
# Classes domain and domain2 consist of operations that a domain performs on
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v24 04/15] x86/VPMU: Interface for setting PMU mode and flags
2015-06-10 15:04 ` [PATCH v24 04/15] x86/VPMU: Interface for setting PMU mode and flags Boris Ostrovsky
@ 2015-06-11 8:17 ` Tian, Kevin
2015-06-11 14:54 ` Boris Ostrovsky
0 siblings, 1 reply; 36+ messages in thread
From: Tian, Kevin @ 2015-06-11 8:17 UTC (permalink / raw)
To: Boris Ostrovsky, JBeulich, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: tim, Nakajima, Jun, xen-devel
> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> Sent: Wednesday, June 10, 2015 11:04 PM
>
> Add runtime interface for setting PMU mode and flags. Three main modes are
> provided:
> * XENPMU_MODE_OFF: PMU is not virtualized
> * XENPMU_MODE_SELF: Guests can access PMU MSRs and receive PMU interrupts.
> * XENPMU_MODE_HV: Same as XENPMU_MODE_SELF for non-proviledged guests, dom0
> can profile itself and the hypervisor.
>
> Note that PMU modes are different from what can be provided at Xen's boot line
> with 'vpmu' argument. An 'off' (or '0') value is equivalent to XENPMU_MODE_OFF.
> Any other value, on the other hand, will cause VPMU mode to be set to
> XENPMU_MODE_SELF during boot.
>
> For feature flags only Intel's BTS is currently supported.
>
> Mode and flags are set via HYPERVISOR_xenpmu_op hypercall.
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Acked-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> Acked-by: Jan Beulich <jbeulich@suse.com>
> Reviewed-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
> ---
> tools/flask/policy/policy/modules/xen/xen.te | 3 +
> xen/arch/x86/domain.c | 4 +-
> xen/arch/x86/hvm/svm/vpmu.c | 4 +-
> xen/arch/x86/hvm/vmx/vpmu_core2.c | 10 +-
> xen/arch/x86/hvm/vpmu.c | 159
> +++++++++++++++++++++++++--
> xen/arch/x86/x86_64/compat/entry.S | 4 +
> xen/arch/x86/x86_64/entry.S | 4 +
> xen/include/asm-x86/hvm/vpmu.h | 27 +++--
> xen/include/public/pmu.h | 46 ++++++++
> xen/include/public/xen.h | 1 +
> xen/include/xen/hypercall.h | 4 +
> xen/include/xlat.lst | 1 +
> xen/include/xsm/dummy.h | 15 +++
> xen/include/xsm/xsm.h | 6 +
> xen/xsm/dummy.c | 1 +
> xen/xsm/flask/hooks.c | 18 +++
> xen/xsm/flask/policy/access_vectors | 2 +
> 17 files changed, 284 insertions(+), 25 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index d829d68..5cfb2f0 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -246,28 +266,45 @@ void vpmu_initialise(struct vcpu *v)
>
> ASSERT(!vpmu->flags && !vpmu->context);
>
> + /*
> + * Count active VPMUs so that we won't try to change vpmu_mode while
> + * they are in use.
> + */
> + spin_lock(&vpmu_lock);
> + vpmu_count++;
> + spin_unlock(&vpmu_lock);
> +
> switch ( vendor )
> {
> case X86_VENDOR_AMD:
> - ret = svm_vpmu_initialise(v, opt_vpmu_enabled);
> + ret = svm_vpmu_initialise(v);
> break;
>
> case X86_VENDOR_INTEL:
> - ret = vmx_vpmu_initialise(v, opt_vpmu_enabled);
> + ret = vmx_vpmu_initialise(v);
> break;
>
> default:
> - if ( opt_vpmu_enabled )
> + if ( vpmu_mode != XENPMU_MODE_OFF )
> {
> printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
> "Disabling VPMU\n", vendor);
> opt_vpmu_enabled = 0;
> + vpmu_mode = XENPMU_MODE_OFF;
> }
> - return;
> + return; /* Don't bother restoring vpmu_count, VPMU is off forever */
why not restoring vpmu_count here? There could be a race condition
regarding to the mode control path:
+ if ( (vpmu_count == 0) || (vpmu_mode == pmu_params.val) ||
+ ((vpmu_mode ^ pmu_params.val) ==
+ (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
+ vpmu_mode = pmu_params.val;
It's possible "vpmu_mode=pmu_params.val" happens later than
"vpmu_mode = XENPMU_MODE_OFF"...
It might not be a big problem since opt_vpmu_enabled is 0 then, but
then there's pointless to reset vpmu_mode further if the behavior
is not guaranteed.
> }
>
> if ( ret )
> printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
> +
> + /* Intel needs to initialize VPMU ops even if VPMU is not in use */
> + if ( ret || (vpmu_mode == XENPMU_MODE_OFF) )
> + {
> + spin_lock(&vpmu_lock);
> + vpmu_count--;
> + spin_unlock(&vpmu_lock);
> + }
> }
>
> static void vpmu_clear_last(void *arg)
> @@ -296,6 +333,10 @@ void vpmu_destroy(struct vcpu *v)
>
> if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
> vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
> +
> + spin_lock(&vpmu_lock);
> + vpmu_count--;
> + spin_unlock(&vpmu_lock);
> }
>
> /* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */
> @@ -307,6 +348,109 @@ void vpmu_dump(struct vcpu *v)
> vpmu->arch_vpmu_ops->arch_vpmu_dump(v);
> }
>
> +long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t)
> arg)
> +{
> + int ret;
> + struct xen_pmu_params pmu_params = {.val = 0};
> +
> + if ( !opt_vpmu_enabled )
> + return -EOPNOTSUPP;
> +
> + ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
> + if ( ret )
> + return ret;
> +
> + /* Check major version when parameters are specified */
> + switch ( op )
> + {
> + case XENPMU_mode_set:
> + case XENPMU_feature_set:
> + if ( copy_from_guest(&pmu_params, arg, 1) )
> + return -EFAULT;
> +
> + if ( pmu_params.version.maj != XENPMU_VER_MAJ )
> + return -EINVAL;
> + }
> +
> + switch ( op )
> + {
> + case XENPMU_mode_set:
> + {
> + if ( (pmu_params.val & ~(XENPMU_MODE_SELF | XENPMU_MODE_HV)) ||
> + (hweight64(pmu_params.val) > 1) )
> + return -EINVAL;
> +
> + /* 32-bit dom0 can only sample itself. */
> + if ( is_pv_32bit_vcpu(current) && (pmu_params.val & XENPMU_MODE_HV) )
> + return -EINVAL;
> +
> + spin_lock(&vpmu_lock);
> +
> + /*
> + * We can always safely switch between XENPMU_MODE_SELF and
> + * XENPMU_MODE_HV while other VPMUs are active.
> + */
> + if ( (vpmu_count == 0) || (vpmu_mode == pmu_params.val) ||
> + ((vpmu_mode ^ pmu_params.val) ==
> + (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
> + vpmu_mode = pmu_params.val;
> + else
> + {
> + printk(XENLOG_WARNING "VPMU: Cannot change mode while"
> + " active VPMUs exist\n");
> + ret = -EBUSY;
> + }
> +
> + spin_unlock(&vpmu_lock);
> +
> + break;
> + }
> +
> + case XENPMU_mode_get:
> + memset(&pmu_params, 0, sizeof(pmu_params));
> + pmu_params.val = vpmu_mode;
> +
> + pmu_params.version.maj = XENPMU_VER_MAJ;
> + pmu_params.version.min = XENPMU_VER_MIN;
> +
> + if ( copy_to_guest(arg, &pmu_params, 1) )
> + return -EFAULT;
> +
> + break;
> +
> + case XENPMU_feature_set:
> + if ( pmu_params.val & ~XENPMU_FEATURE_INTEL_BTS )
> + return -EINVAL;
> +
> + spin_lock(&vpmu_lock);
> +
> + if ( vpmu_count == 0 )
> + vpmu_features = pmu_params.val;
> + else
> + {
> + printk(XENLOG_WARNING "VPMU: Cannot change features while"
> + " active VPMUs exist\n");
> + ret = -EBUSY;
> + }
what about setting same features as existing in vpmu_features?
we should do same check as done in mode setting.
Thanks
Kevin
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 04/15] x86/VPMU: Interface for setting PMU mode and flags
2015-06-11 8:17 ` Tian, Kevin
@ 2015-06-11 14:54 ` Boris Ostrovsky
2015-06-11 15:04 ` Jan Beulich
2015-06-12 3:23 ` Tian, Kevin
0 siblings, 2 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-11 14:54 UTC (permalink / raw)
To: Tian, Kevin, JBeulich, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: tim, Nakajima, Jun, xen-devel
On 06/11/2015 04:17 AM, Tian, Kevin wrote:
>> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
>> switch ( vendor )
>> {
>> case X86_VENDOR_AMD:
>> - ret = svm_vpmu_initialise(v, opt_vpmu_enabled);
>> + ret = svm_vpmu_initialise(v);
>> break;
>>
>> case X86_VENDOR_INTEL:
>> - ret = vmx_vpmu_initialise(v, opt_vpmu_enabled);
>> + ret = vmx_vpmu_initialise(v);
>> break;
>>
>> default:
>> - if ( opt_vpmu_enabled )
>> + if ( vpmu_mode != XENPMU_MODE_OFF )
>> {
>> printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
>> "Disabling VPMU\n", vendor);
>> opt_vpmu_enabled = 0;
>> + vpmu_mode = XENPMU_MODE_OFF;
>> }
>> - return;
>> + return; /* Don't bother restoring vpmu_count, VPMU is off forever */
>
> why not restoring vpmu_count here? There could be a race condition
> regarding to the mode control path:
>
> + if ( (vpmu_count == 0) || (vpmu_mode == pmu_params.val) ||
> + ((vpmu_mode ^ pmu_params.val) ==
> + (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
> + vpmu_mode = pmu_params.val;
>
> It's possible "vpmu_mode=pmu_params.val" happens later than
> "vpmu_mode = XENPMU_MODE_OFF"...
>
> It might not be a big problem since opt_vpmu_enabled is 0 then, but
> then there's pointless to reset vpmu_mode further if the behavior
> is not guaranteed.
It is somewhat pointless to reset it, but mostly because if we ever get
into the default case above we have much bigger problems than VPMU: the
only way that I can see when this can happen (vendor not being AMD or
Intel) is that current_cpu_data.x86_vendor got overwritten by something
else, which means memory corruption. (I in fact wondered whether I
should just stick a BUG() here).
BTW, even if I decremented vpmu_count above and took vpmu_lock to avoid
the race this would still not be enough to avoid VPMU-related
inconsistencies: we would really need to make sure that no VCPU in the
system (i.e. for all guests) is in the middle of a VPMU operation. And
that would be somewhat non-trivial (short of pausing all guests, but
then we'd still need to deal with dom0).
That's basically the reason for the "Don't bother" comment. Getting this
completely right is way too much effort for no benefit (AFAICS).
>> +
>> + case XENPMU_feature_set:
>> + if ( pmu_params.val & ~XENPMU_FEATURE_INTEL_BTS )
>> + return -EINVAL;
>> +
>> + spin_lock(&vpmu_lock);
>> +
>> + if ( vpmu_count == 0 )
>> + vpmu_features = pmu_params.val;
>> + else
>> + {
>> + printk(XENLOG_WARNING "VPMU: Cannot change features while"
>> + " active VPMUs exist\n");
>> + ret = -EBUSY;
>> + }
>
> what about setting same features as existing in vpmu_features?
> we should do same check as done in mode setting.
Not sure I follow you. There is only one feature currently that we
support --- BTS. And trying to set any other feature will result in
-EINVAL. What is wrong with trying to set the same bit twice? (except
for being pointless)
-boris
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 04/15] x86/VPMU: Interface for setting PMU mode and flags
2015-06-11 14:54 ` Boris Ostrovsky
@ 2015-06-11 15:04 ` Jan Beulich
2015-06-11 15:14 ` Boris Ostrovsky
2015-06-12 3:23 ` Tian, Kevin
1 sibling, 1 reply; 36+ messages in thread
From: Jan Beulich @ 2015-06-11 15:04 UTC (permalink / raw)
To: Boris Ostrovsky
Cc: Kevin Tian, suravee.suthikulpanit, andrew.cooper3, tim,
dietmar.hahn, xen-devel, Aravind.Gopalakrishnan, Jun Nakajima,
dgdegra
>>> On 11.06.15 at 16:54, <boris.ostrovsky@oracle.com> wrote:
> On 06/11/2015 04:17 AM, Tian, Kevin wrote:
>>> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
>
>>> switch ( vendor )
>>> {
>>> case X86_VENDOR_AMD:
>>> - ret = svm_vpmu_initialise(v, opt_vpmu_enabled);
>>> + ret = svm_vpmu_initialise(v);
>>> break;
>>>
>>> case X86_VENDOR_INTEL:
>>> - ret = vmx_vpmu_initialise(v, opt_vpmu_enabled);
>>> + ret = vmx_vpmu_initialise(v);
>>> break;
>>>
>>> default:
>>> - if ( opt_vpmu_enabled )
>>> + if ( vpmu_mode != XENPMU_MODE_OFF )
>>> {
>>> printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
>>> "Disabling VPMU\n", vendor);
>>> opt_vpmu_enabled = 0;
>>> + vpmu_mode = XENPMU_MODE_OFF;
>>> }
>>> - return;
>>> + return; /* Don't bother restoring vpmu_count, VPMU is off forever
> */
>>
>> why not restoring vpmu_count here? There could be a race condition
>> regarding to the mode control path:
>>
>> + if ( (vpmu_count == 0) || (vpmu_mode == pmu_params.val) ||
>> + ((vpmu_mode ^ pmu_params.val) ==
>> + (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
>> + vpmu_mode = pmu_params.val;
>>
>> It's possible "vpmu_mode=pmu_params.val" happens later than
>> "vpmu_mode = XENPMU_MODE_OFF"...
>>
>> It might not be a big problem since opt_vpmu_enabled is 0 then, but
>> then there's pointless to reset vpmu_mode further if the behavior
>> is not guaranteed.
>
>
> It is somewhat pointless to reset it, but mostly because if we ever get
> into the default case above we have much bigger problems than VPMU: the
> only way that I can see when this can happen (vendor not being AMD or
> Intel) is that current_cpu_data.x86_vendor got overwritten by something
> else, which means memory corruption.
Or you're running on a Cyrix CPU (unless there's code earlier on
preventing that switch() from being reached).
Jan
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 04/15] x86/VPMU: Interface for setting PMU mode and flags
2015-06-11 15:04 ` Jan Beulich
@ 2015-06-11 15:14 ` Boris Ostrovsky
2015-06-11 16:09 ` Jan Beulich
0 siblings, 1 reply; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-11 15:14 UTC (permalink / raw)
To: Jan Beulich
Cc: Kevin Tian, suravee.suthikulpanit, andrew.cooper3, tim,
dietmar.hahn, xen-devel, Aravind.Gopalakrishnan, Jun Nakajima,
dgdegra
On 06/11/2015 11:04 AM, Jan Beulich wrote:
>>>> On 11.06.15 at 16:54, <boris.ostrovsky@oracle.com> wrote:
>> On 06/11/2015 04:17 AM, Tian, Kevin wrote:
>>>> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
>>>> switch ( vendor )
>>>> {
>>>> case X86_VENDOR_AMD:
>>>> - ret = svm_vpmu_initialise(v, opt_vpmu_enabled);
>>>> + ret = svm_vpmu_initialise(v);
>>>> break;
>>>>
>>>> case X86_VENDOR_INTEL:
>>>> - ret = vmx_vpmu_initialise(v, opt_vpmu_enabled);
>>>> + ret = vmx_vpmu_initialise(v);
>>>> break;
>>>>
>>>> default:
>>>> - if ( opt_vpmu_enabled )
>>>> + if ( vpmu_mode != XENPMU_MODE_OFF )
>>>> {
>>>> printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
>>>> "Disabling VPMU\n", vendor);
>>>> opt_vpmu_enabled = 0;
>>>> + vpmu_mode = XENPMU_MODE_OFF;
>>>> }
>>>> - return;
>>>> + return; /* Don't bother restoring vpmu_count, VPMU is off forever
>> */
>>> why not restoring vpmu_count here? There could be a race condition
>>> regarding to the mode control path:
>>>
>>> + if ( (vpmu_count == 0) || (vpmu_mode == pmu_params.val) ||
>>> + ((vpmu_mode ^ pmu_params.val) ==
>>> + (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
>>> + vpmu_mode = pmu_params.val;
>>>
>>> It's possible "vpmu_mode=pmu_params.val" happens later than
>>> "vpmu_mode = XENPMU_MODE_OFF"...
>>>
>>> It might not be a big problem since opt_vpmu_enabled is 0 then, but
>>> then there's pointless to reset vpmu_mode further if the behavior
>>> is not guaranteed.
>>
>> It is somewhat pointless to reset it, but mostly because if we ever get
>> into the default case above we have much bigger problems than VPMU: the
>> only way that I can see when this can happen (vendor not being AMD or
>> Intel) is that current_cpu_data.x86_vendor got overwritten by something
>> else, which means memory corruption.
> Or you're running on a Cyrix CPU (unless there's code earlier on
> preventing that switch() from being reached).
Can't happen: we are getting into vpmu_initialise() from
* VMX or SVM code --- so it's AMD or Intel.
* pvpmu_init() but it is gated by 'vpmu_mode == XENPMU_MODE_OFF'.
And the only way vpmu_mode can be anything but XENPMU_MODE_OFF is if
vpmu_init() set it in the first place. And that routine does check for
boot CPU's vendor.
As a side question, will Xen boot on Cyrix (or are they VIA)?
-boris
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 04/15] x86/VPMU: Interface for setting PMU mode and flags
2015-06-11 15:14 ` Boris Ostrovsky
@ 2015-06-11 16:09 ` Jan Beulich
0 siblings, 0 replies; 36+ messages in thread
From: Jan Beulich @ 2015-06-11 16:09 UTC (permalink / raw)
To: Boris Ostrovsky
Cc: Kevin Tian, suravee.suthikulpanit, andrew.cooper3, tim,
dietmar.hahn, xen-devel, Aravind.Gopalakrishnan, Jun Nakajima,
dgdegra
>>> On 11.06.15 at 17:14, <boris.ostrovsky@oracle.com> wrote:
> As a side question, will Xen boot on Cyrix (or are they VIA)?
At some point I made that work; the box I have for that has
developed a problem though (turning itself off at random
points in time), so I can't really try anymore whether this continues
to work.
Jan
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 04/15] x86/VPMU: Interface for setting PMU mode and flags
2015-06-11 14:54 ` Boris Ostrovsky
2015-06-11 15:04 ` Jan Beulich
@ 2015-06-12 3:23 ` Tian, Kevin
2015-06-12 13:58 ` Boris Ostrovsky
1 sibling, 1 reply; 36+ messages in thread
From: Tian, Kevin @ 2015-06-12 3:23 UTC (permalink / raw)
To: Boris Ostrovsky, JBeulich, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: tim, Nakajima, Jun, xen-devel
> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> Sent: Thursday, June 11, 2015 10:54 PM
>
> On 06/11/2015 04:17 AM, Tian, Kevin wrote:
> >> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
>
> >> switch ( vendor )
> >> {
> >> case X86_VENDOR_AMD:
> >> - ret = svm_vpmu_initialise(v, opt_vpmu_enabled);
> >> + ret = svm_vpmu_initialise(v);
> >> break;
> >>
> >> case X86_VENDOR_INTEL:
> >> - ret = vmx_vpmu_initialise(v, opt_vpmu_enabled);
> >> + ret = vmx_vpmu_initialise(v);
> >> break;
> >>
> >> default:
> >> - if ( opt_vpmu_enabled )
> >> + if ( vpmu_mode != XENPMU_MODE_OFF )
> >> {
> >> printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
> >> "Disabling VPMU\n", vendor);
> >> opt_vpmu_enabled = 0;
> >> + vpmu_mode = XENPMU_MODE_OFF;
> >> }
> >> - return;
> >> + return; /* Don't bother restoring vpmu_count, VPMU is off forever */
> >
> > why not restoring vpmu_count here? There could be a race condition
> > regarding to the mode control path:
> >
> > + if ( (vpmu_count == 0) || (vpmu_mode == pmu_params.val) ||
> > + ((vpmu_mode ^ pmu_params.val) ==
> > + (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
> > + vpmu_mode = pmu_params.val;
> >
> > It's possible "vpmu_mode=pmu_params.val" happens later than
> > "vpmu_mode = XENPMU_MODE_OFF"...
> >
> > It might not be a big problem since opt_vpmu_enabled is 0 then, but
> > then there's pointless to reset vpmu_mode further if the behavior
> > is not guaranteed.
>
>
> It is somewhat pointless to reset it, but mostly because if we ever get
> into the default case above we have much bigger problems than VPMU: the
> only way that I can see when this can happen (vendor not being AMD or
> Intel) is that current_cpu_data.x86_vendor got overwritten by something
> else, which means memory corruption. (I in fact wondered whether I
> should just stick a BUG() here).
>
> BTW, even if I decremented vpmu_count above and took vpmu_lock to avoid
> the race this would still not be enough to avoid VPMU-related
> inconsistencies: we would really need to make sure that no VCPU in the
> system (i.e. for all guests) is in the middle of a VPMU operation. And
> that would be somewhat non-trivial (short of pausing all guests, but
> then we'd still need to deal with dom0).
>
> That's basically the reason for the "Don't bother" comment. Getting this
> completely right is way too much effort for no benefit (AFAICS).
I got this explanation. Thanks.
>
>
> >> +
> >> + case XENPMU_feature_set:
> >> + if ( pmu_params.val & ~XENPMU_FEATURE_INTEL_BTS )
> >> + return -EINVAL;
> >> +
> >> + spin_lock(&vpmu_lock);
> >> +
> >> + if ( vpmu_count == 0 )
> >> + vpmu_features = pmu_params.val;
> >> + else
> >> + {
> >> + printk(XENLOG_WARNING "VPMU: Cannot change features while"
> >> + " active VPMUs exist\n");
> >> + ret = -EBUSY;
> >> + }
> >
> > what about setting same features as existing in vpmu_features?
> > we should do same check as done in mode setting.
>
>
> Not sure I follow you. There is only one feature currently that we
> support --- BTS. And trying to set any other feature will result in
> -EINVAL. What is wrong with trying to set the same bit twice? (except
> for being pointless)
>
My point is whether you want to allow setting same bit twice
when active PMUs exist. From above code it's disallowed
w/ check on vpmu_count. However in earlier code handling
vpmu_mode setting, you actually allow setting same mode
twice:
+ if ( (vpmu_count == 0) || (vpmu_mode == pmu_params.val) ||
+ ((vpmu_mode ^ pmu_params.val) ==
+ (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
+ vpmu_mode = pmu_params.val;
So I thought you may keep the same policy to allow setting
vpmu_features with same bit twice too.
Thanks
Kevin
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 04/15] x86/VPMU: Interface for setting PMU mode and flags
2015-06-12 3:23 ` Tian, Kevin
@ 2015-06-12 13:58 ` Boris Ostrovsky
0 siblings, 0 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-12 13:58 UTC (permalink / raw)
To: Tian, Kevin, JBeulich, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: tim, Nakajima, Jun, xen-devel
On 06/11/2015 11:23 PM, Tian, Kevin wrote:
>> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
>
>>
>>
>>>> +
>>>> + case XENPMU_feature_set:
>>>> + if ( pmu_params.val & ~XENPMU_FEATURE_INTEL_BTS )
>>>> + return -EINVAL;
>>>> +
>>>> + spin_lock(&vpmu_lock);
>>>> +
>>>> + if ( vpmu_count == 0 )
>>>> + vpmu_features = pmu_params.val;
>>>> + else
>>>> + {
>>>> + printk(XENLOG_WARNING "VPMU: Cannot change features while"
>>>> + " active VPMUs exist\n");
>>>> + ret = -EBUSY;
>>>> + }
>>>
>>> what about setting same features as existing in vpmu_features?
>>> we should do same check as done in mode setting.
>>
>>
>> Not sure I follow you. There is only one feature currently that we
>> support --- BTS. And trying to set any other feature will result in
>> -EINVAL. What is wrong with trying to set the same bit twice? (except
>> for being pointless)
>>
>
> My point is whether you want to allow setting same bit twice
> when active PMUs exist. From above code it's disallowed
> w/ check on vpmu_count. However in earlier code handling
> vpmu_mode setting, you actually allow setting same mode
> twice:
>
> + if ( (vpmu_count == 0) || (vpmu_mode == pmu_params.val) ||
> + ((vpmu_mode ^ pmu_params.val) ==
> + (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
> + vpmu_mode = pmu_params.val;
>
> So I thought you may keep the same policy to allow setting
> vpmu_features with same bit twice too.
Oh, I see. You are suggesting changing XENPMU_feature_set to
if ( (vpmu_count == 0) || (vpmu_features == pmu_params.val) )
vpmu_features = pmu_params.val;
or dropping '|| (vpmu_mode == pmu_params.val)' from XENPMU_mode_set for
consistency's sake?
I suppose I can do that, yes.
-boris
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v24 05/15] x86/VPMU: Initialize VPMUs with __initcall
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
` (3 preceding siblings ...)
2015-06-10 15:04 ` [PATCH v24 04/15] x86/VPMU: Interface for setting PMU mode and flags Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-10 15:04 ` [PATCH v24 06/15] x86/VPMU: Initialize PMU for PV(H) guests Boris Ostrovsky
` (9 subsequent siblings)
14 siblings, 0 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
Move some VPMU initilization operations into __initcalls to avoid performing
same tests and calculations for each vcpu.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/hvm/svm/vpmu.c | 106 ++++++++++++--------------
xen/arch/x86/hvm/vmx/vpmu_core2.c | 151 +++++++++++++++++++-------------------
xen/arch/x86/hvm/vpmu.c | 32 ++++++++
xen/include/asm-x86/hvm/vpmu.h | 2 +
4 files changed, 156 insertions(+), 135 deletions(-)
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 481ea7b..b60ca40 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -356,54 +356,6 @@ static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
return 1;
}
-static int amd_vpmu_initialise(struct vcpu *v)
-{
- struct xen_pmu_amd_ctxt *ctxt;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- uint8_t family = current_cpu_data.x86;
-
- if ( counters == NULL )
- {
- switch ( family )
- {
- case 0x15:
- num_counters = F15H_NUM_COUNTERS;
- counters = AMD_F15H_COUNTERS;
- ctrls = AMD_F15H_CTRLS;
- k7_counters_mirrored = 1;
- break;
- case 0x10:
- case 0x12:
- case 0x14:
- case 0x16:
- default:
- num_counters = F10H_NUM_COUNTERS;
- counters = AMD_F10H_COUNTERS;
- ctrls = AMD_F10H_CTRLS;
- k7_counters_mirrored = 0;
- break;
- }
- }
-
- ctxt = xzalloc_bytes(sizeof(*ctxt) +
- 2 * sizeof(uint64_t) * num_counters);
- if ( !ctxt )
- {
- gdprintk(XENLOG_WARNING, "Insufficient memory for PMU, "
- " PMU feature is unavailable on domain %d vcpu %d.\n",
- v->vcpu_id, v->domain->domain_id);
- return -ENOMEM;
- }
-
- ctxt->counters = sizeof(*ctxt);
- ctxt->ctrls = ctxt->counters + sizeof(uint64_t) * num_counters;
-
- vpmu->context = ctxt;
- vpmu->priv_context = NULL;
- vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
- return 0;
-}
-
static void amd_vpmu_destroy(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
@@ -474,30 +426,62 @@ struct arch_vpmu_ops amd_vpmu_ops = {
int svm_vpmu_initialise(struct vcpu *v)
{
+ struct xen_pmu_amd_ctxt *ctxt;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- uint8_t family = current_cpu_data.x86;
- int ret = 0;
- /* vpmu enabled? */
if ( vpmu_mode == XENPMU_MODE_OFF )
return 0;
- switch ( family )
+ if ( !counters )
+ return -EINVAL;
+
+ ctxt = xzalloc_bytes(sizeof(*ctxt) +
+ 2 * sizeof(uint64_t) * num_counters);
+ if ( !ctxt )
{
+ printk(XENLOG_G_WARNING "Insufficient memory for PMU, "
+ " PMU feature is unavailable on domain %d vcpu %d.\n",
+ v->vcpu_id, v->domain->domain_id);
+ return -ENOMEM;
+ }
+
+ ctxt->counters = sizeof(*ctxt);
+ ctxt->ctrls = ctxt->counters + sizeof(uint64_t) * num_counters;
+
+ vpmu->context = ctxt;
+ vpmu->priv_context = NULL;
+
+ vpmu->arch_vpmu_ops = &amd_vpmu_ops;
+
+ vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
+ return 0;
+}
+
+int __init amd_vpmu_init(void)
+{
+ switch ( current_cpu_data.x86 )
+ {
+ case 0x15:
+ num_counters = F15H_NUM_COUNTERS;
+ counters = AMD_F15H_COUNTERS;
+ ctrls = AMD_F15H_CTRLS;
+ k7_counters_mirrored = 1;
+ break;
case 0x10:
case 0x12:
case 0x14:
- case 0x15:
case 0x16:
- ret = amd_vpmu_initialise(v);
- if ( !ret )
- vpmu->arch_vpmu_ops = &amd_vpmu_ops;
- return ret;
+ num_counters = F10H_NUM_COUNTERS;
+ counters = AMD_F10H_COUNTERS;
+ ctrls = AMD_F10H_CTRLS;
+ k7_counters_mirrored = 0;
+ break;
+ default:
+ printk(XENLOG_WARNING "VPMU: Unsupported CPU family %#x\n",
+ current_cpu_data.x86);
+ return -EINVAL;
}
- printk("VPMU: Initialization failed. "
- "AMD processor family %d has not "
- "been supported\n", family);
- return -EINVAL;
+ return 0;
}
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index cfcdf42..025c970 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -708,62 +708,6 @@ static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
return 1;
}
-static int core2_vpmu_initialise(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- u64 msr_content;
- static bool_t ds_warned;
-
- if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
- goto func_out;
- /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
- while ( boot_cpu_has(X86_FEATURE_DS) )
- {
- if ( !boot_cpu_has(X86_FEATURE_DTES64) )
- {
- if ( !ds_warned )
- printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area"
- " - Debug Store disabled for guests\n");
- break;
- }
- vpmu_set(vpmu, VPMU_CPU_HAS_DS);
- rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
- if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL )
- {
- /* If BTS_UNAVAIL is set reset the DS feature. */
- vpmu_reset(vpmu, VPMU_CPU_HAS_DS);
- if ( !ds_warned )
- printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL"
- " - Debug Store disabled for guests\n");
- break;
- }
-
- vpmu_set(vpmu, VPMU_CPU_HAS_BTS);
- if ( !ds_warned )
- {
- if ( !boot_cpu_has(X86_FEATURE_DSCPL) )
- printk(XENLOG_G_INFO
- "vpmu: CPU doesn't support CPL-Qualified BTS\n");
- printk("******************************************************\n");
- printk("** WARNING: Emulation of BTS Feature is switched on **\n");
- printk("** Using this processor feature in a virtualized **\n");
- printk("** environment is not 100%% safe. **\n");
- printk("** Setting the DS buffer address with wrong values **\n");
- printk("** may lead to hypervisor hangs or crashes. **\n");
- printk("** It is NOT recommended for production use! **\n");
- printk("******************************************************\n");
- }
- break;
- }
- ds_warned = 1;
- func_out:
-
- arch_pmc_cnt = core2_get_arch_pmc_count();
- fixed_pmc_cnt = core2_get_fixed_pmc_count();
- check_pmc_quirk();
- return 0;
-}
-
static void core2_vpmu_destroy(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
@@ -829,23 +773,77 @@ struct arch_vpmu_ops core2_no_vpmu_ops = {
int vmx_vpmu_initialise(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- uint8_t family = current_cpu_data.x86;
- uint8_t cpu_model = current_cpu_data.x86_model;
- int ret = 0;
+ u64 msr_content;
+ static bool_t ds_warned;
vpmu->arch_vpmu_ops = &core2_no_vpmu_ops;
if ( vpmu_mode == XENPMU_MODE_OFF )
return 0;
- if ( family == 6 )
- {
- u64 caps;
+ if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 )
+ return -EINVAL;
- rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
- full_width_write = (caps >> 13) & 1;
+ if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
+ goto func_out;
+ /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
+ while ( boot_cpu_has(X86_FEATURE_DS) )
+ {
+ if ( !boot_cpu_has(X86_FEATURE_DTES64) )
+ {
+ if ( !ds_warned )
+ printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area"
+ " - Debug Store disabled for guests\n");
+ break;
+ }
+ vpmu_set(vpmu, VPMU_CPU_HAS_DS);
+ rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
+ if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL )
+ {
+ /* If BTS_UNAVAIL is set reset the DS feature. */
+ vpmu_reset(vpmu, VPMU_CPU_HAS_DS);
+ if ( !ds_warned )
+ printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL"
+ " - Debug Store disabled for guests\n");
+ break;
+ }
- switch ( cpu_model )
+ vpmu_set(vpmu, VPMU_CPU_HAS_BTS);
+ if ( !ds_warned )
{
+ if ( !boot_cpu_has(X86_FEATURE_DSCPL) )
+ printk(XENLOG_G_INFO
+ "vpmu: CPU doesn't support CPL-Qualified BTS\n");
+ printk("******************************************************\n");
+ printk("** WARNING: Emulation of BTS Feature is switched on **\n");
+ printk("** Using this processor feature in a virtualized **\n");
+ printk("** environment is not 100%% safe. **\n");
+ printk("** Setting the DS buffer address with wrong values **\n");
+ printk("** may lead to hypervisor hangs or crashes. **\n");
+ printk("** It is NOT recommended for production use! **\n");
+ printk("******************************************************\n");
+ }
+ break;
+ }
+ ds_warned = 1;
+ func_out:
+
+ vpmu->arch_vpmu_ops = &core2_vpmu_ops;
+
+ return 0;
+}
+
+int __init core2_vpmu_init(void)
+{
+ u64 caps;
+
+ if ( current_cpu_data.x86 != 6 )
+ {
+ printk(XENLOG_WARNING "VPMU: only family 6 is supported\n");
+ return -EINVAL;
+ }
+
+ switch ( current_cpu_data.x86_model )
+ {
/* Core2: */
case 0x0f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
case 0x16: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
@@ -884,16 +882,21 @@ int vmx_vpmu_initialise(struct vcpu *v)
/* next gen Xeon Phi */
case 0x57:
- ret = core2_vpmu_initialise(v);
- if ( !ret )
- vpmu->arch_vpmu_ops = &core2_vpmu_ops;
- return ret;
- }
+ break;
+
+ default:
+ printk(XENLOG_WARNING "VPMU: Unsupported CPU model %#x\n",
+ current_cpu_data.x86_model);
+ return -EINVAL;
}
- printk("VPMU: Initialization failed. "
- "Intel processor family %d model %d has not "
- "been supported\n", family, cpu_model);
- return -EINVAL;
+ arch_pmc_cnt = core2_get_arch_pmc_count();
+ fixed_pmc_cnt = core2_get_fixed_pmc_count();
+ rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
+ full_width_write = (caps >> 13) & 1;
+
+ check_pmc_quirk();
+
+ return 0;
}
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index 5cfb2f0..bb0b2e3 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -453,14 +453,46 @@ long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
static int __init vpmu_init(void)
{
+ int vendor = current_cpu_data.x86_vendor;
+
+ if ( !opt_vpmu_enabled )
+ {
+ printk(XENLOG_INFO "VPMU: disabled\n");
+ return 0;
+ }
+
/* NMI watchdog uses LVTPC and HW counter */
if ( opt_watchdog && opt_vpmu_enabled )
{
printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n");
opt_vpmu_enabled = 0;
vpmu_mode = XENPMU_MODE_OFF;
+ return 0;
+ }
+
+ switch ( vendor )
+ {
+ case X86_VENDOR_AMD:
+ if ( amd_vpmu_init() )
+ vpmu_mode = XENPMU_MODE_OFF;
+ break;
+ case X86_VENDOR_INTEL:
+ if ( core2_vpmu_init() )
+ vpmu_mode = XENPMU_MODE_OFF;
+ break;
+ default:
+ printk(XENLOG_WARNING "VPMU: Unknown CPU vendor: %d. "
+ "Turning VPMU off.\n", vendor);
+ vpmu_mode = XENPMU_MODE_OFF;
+ break;
}
+ if ( vpmu_mode != XENPMU_MODE_OFF )
+ printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "."
+ __stringify(XENPMU_VER_MIN) "\n");
+ else
+ opt_vpmu_enabled = 0;
+
return 0;
}
__initcall(vpmu_init);
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index 88ffc19..96f7666 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -52,7 +52,9 @@ struct arch_vpmu_ops {
void (*arch_vpmu_dump)(const struct vcpu *);
};
+int core2_vpmu_init(void);
int vmx_vpmu_initialise(struct vcpu *);
+int amd_vpmu_init(void);
int svm_vpmu_initialise(struct vcpu *);
struct vpmu_struct {
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v24 06/15] x86/VPMU: Initialize PMU for PV(H) guests
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
` (4 preceding siblings ...)
2015-06-10 15:04 ` [PATCH v24 05/15] x86/VPMU: Initialize VPMUs with __initcall Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-11 8:21 ` Tian, Kevin
2015-06-10 15:04 ` [PATCH v24 07/15] x86/VPMU: Save VPMU state for PV guests during context switch Boris Ostrovsky
` (8 subsequent siblings)
14 siblings, 1 reply; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
Code for initializing/tearing down PMU for PV guests
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
Acked-by: Jan Beulich <jbeulich@suse.com>
---
tools/flask/policy/policy/modules/xen/xen.te | 4 +
xen/arch/x86/domain.c | 2 +
xen/arch/x86/hvm/hvm.c | 1 +
xen/arch/x86/hvm/svm/svm.c | 4 +-
xen/arch/x86/hvm/svm/vpmu.c | 16 +++-
xen/arch/x86/hvm/vmx/vmx.c | 4 +-
xen/arch/x86/hvm/vmx/vpmu_core2.c | 30 ++++--
xen/arch/x86/hvm/vpmu.c | 131 ++++++++++++++++++++++++---
xen/common/event_channel.c | 1 +
xen/include/asm-x86/hvm/vpmu.h | 2 +
xen/include/public/pmu.h | 2 +
xen/include/public/xen.h | 1 +
xen/include/xsm/dummy.h | 3 +
xen/xsm/flask/hooks.c | 4 +
xen/xsm/flask/policy/access_vectors | 2 +
15 files changed, 181 insertions(+), 26 deletions(-)
diff --git a/tools/flask/policy/policy/modules/xen/xen.te b/tools/flask/policy/policy/modules/xen/xen.te
index 45b5cb2..f553eb5 100644
--- a/tools/flask/policy/policy/modules/xen/xen.te
+++ b/tools/flask/policy/policy/modules/xen/xen.te
@@ -130,6 +130,10 @@ if (guest_writeconsole) {
dontaudit domain_type xen_t : xen writeconsole;
}
+# Allow all domains to use PMU (but not to change its settings --- that's what
+# pmu_ctrl is for)
+allow domain_type xen_t:xen2 pmu_use;
+
###############################################################################
#
# Domain creation
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 8cc3ae7..8e79aca 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -438,6 +438,8 @@ int vcpu_initialise(struct vcpu *v)
vmce_init_vcpu(v);
}
+ spin_lock_init(&v->arch.vpmu.vpmu_lock);
+
if ( has_hvm_container_domain(d) )
{
rc = hvm_vcpu_initialise(v);
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index f354cb7..016e842 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4914,6 +4914,7 @@ static hvm_hypercall_t *const pvh_hypercall64_table[NR_hypercalls] = {
HYPERCALL(hvm_op),
HYPERCALL(sysctl),
HYPERCALL(domctl),
+ HYPERCALL(xenpmu_op),
[ __HYPERVISOR_arch_1 ] = (hvm_hypercall_t *)paging_domctl_continuation
};
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 6734fb6..49d2ddf 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1165,7 +1165,9 @@ static int svm_vcpu_initialise(struct vcpu *v)
return rc;
}
- vpmu_initialise(v);
+ /* PVH's VPMU is initialized via hypercall */
+ if ( is_hvm_vcpu(v) )
+ vpmu_initialise(v);
svm_guest_osvw_init(v);
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index b60ca40..a8572a6 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -364,13 +364,11 @@ static void amd_vpmu_destroy(struct vcpu *v)
amd_vpmu_unset_msr_bitmap(v);
xfree(vpmu->context);
- vpmu_reset(vpmu, VPMU_CONTEXT_ALLOCATED);
if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
- {
- vpmu_reset(vpmu, VPMU_RUNNING);
release_pmu_ownship(PMU_OWNER_HVM);
- }
+
+ vpmu_clear(vpmu);
}
/* VPMU part of the 'q' keyhandler */
@@ -482,6 +480,16 @@ int __init amd_vpmu_init(void)
return -EINVAL;
}
+ if ( sizeof(struct xen_pmu_data) +
+ 2 * sizeof(uint64_t) * num_counters > PAGE_SIZE )
+ {
+ printk(XENLOG_WARNING
+ "VPMU: Register bank does not fit into VPMU shared page\n");
+ counters = ctrls = NULL;
+ num_counters = 0;
+ return -ENOSPC;
+ }
+
return 0;
}
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index af257db..62d99e1 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -140,7 +140,9 @@ static int vmx_vcpu_initialise(struct vcpu *v)
}
}
- vpmu_initialise(v);
+ /* PVH's VPMU is initialized via hypercall */
+ if ( is_hvm_vcpu(v) )
+ vpmu_initialise(v);
vmx_install_vlapic_mapping(v);
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index 025c970..e7642e5 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -365,13 +365,16 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
return 0;
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
- if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
- goto out_err;
+ if ( has_hvm_container_vcpu(v) )
+ {
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
+ goto out_err;
- if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
- goto out_err;
- vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
+ goto out_err;
+ vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ }
core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
sizeof(uint64_t) * fixed_pmc_cnt +
@@ -717,7 +720,7 @@ static void core2_vpmu_destroy(struct vcpu *v)
if ( has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
release_pmu_ownship(PMU_OWNER_HVM);
- vpmu_reset(vpmu, VPMU_CONTEXT_ALLOCATED);
+ vpmu_clear(vpmu);
}
struct arch_vpmu_ops core2_vpmu_ops = {
@@ -827,6 +830,10 @@ int vmx_vpmu_initialise(struct vcpu *v)
ds_warned = 1;
func_out:
+ /* PV domains can allocate resources immediately */
+ if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) )
+ return -EIO;
+
vpmu->arch_vpmu_ops = &core2_vpmu_ops;
return 0;
@@ -897,6 +904,15 @@ int __init core2_vpmu_init(void)
check_pmc_quirk();
+ if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
+ sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE )
+ {
+ printk(XENLOG_WARNING
+ "VPMU: Register bank does not fit into VPMU share page\n");
+ arch_pmc_cnt = fixed_pmc_cnt = 0;
+ return -ENOSPC;
+ }
+
return 0;
}
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index bb0b2e3..cfaa8c8 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -27,6 +27,7 @@
#include <asm/types.h>
#include <asm/msr.h>
#include <asm/nmi.h>
+#include <asm/p2m.h>
#include <asm/hvm/support.h>
#include <asm/hvm/vmx/vmx.h>
#include <asm/hvm/vmx/vmcs.h>
@@ -257,22 +258,25 @@ void vpmu_initialise(struct vcpu *v)
struct vpmu_struct *vpmu = vcpu_vpmu(v);
uint8_t vendor = current_cpu_data.x86_vendor;
int ret;
+ bool_t is_priv_vpmu = is_hardware_domain(v->domain);
BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
- if ( is_pvh_vcpu(v) )
- return;
-
ASSERT(!vpmu->flags && !vpmu->context);
- /*
- * Count active VPMUs so that we won't try to change vpmu_mode while
- * they are in use.
- */
- spin_lock(&vpmu_lock);
- vpmu_count++;
- spin_unlock(&vpmu_lock);
+ if ( !is_priv_vpmu )
+ {
+ /*
+ * Count active VPMUs so that we won't try to change vpmu_mode while
+ * they are in use.
+ * vpmu_mode can be safely updated while dom0's VPMUs are active and
+ * so we don't need to include it in the count.
+ */
+ spin_lock(&vpmu_lock);
+ vpmu_count++;
+ spin_unlock(&vpmu_lock);
+ }
switch ( vendor )
{
@@ -299,7 +303,7 @@ void vpmu_initialise(struct vcpu *v)
printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
/* Intel needs to initialize VPMU ops even if VPMU is not in use */
- if ( ret || (vpmu_mode == XENPMU_MODE_OFF) )
+ if ( !is_priv_vpmu && (ret || (vpmu_mode == XENPMU_MODE_OFF)) )
{
spin_lock(&vpmu_lock);
vpmu_count--;
@@ -332,13 +336,104 @@ void vpmu_destroy(struct vcpu *v)
vpmu_clear_last, v, 1);
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
- vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
+ {
+ /* Unload VPMU first. This will stop counters */
+ on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu),
+ vpmu_save_force, v, 1);
+ vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
+ }
spin_lock(&vpmu_lock);
- vpmu_count--;
+ if ( !is_hardware_domain(v->domain) )
+ vpmu_count--;
spin_unlock(&vpmu_lock);
}
+static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
+{
+ struct vcpu *v;
+ struct vpmu_struct *vpmu;
+ struct page_info *page;
+ uint64_t gfn = params->val;
+
+ if ( vpmu_mode == XENPMU_MODE_OFF )
+ return -EINVAL;
+
+ if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
+ return -EINVAL;
+
+ page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
+ if ( !page )
+ return -EINVAL;
+
+ if ( !get_page_type(page, PGT_writable_page) )
+ {
+ put_page(page);
+ return -EINVAL;
+ }
+
+ v = d->vcpu[params->vcpu];
+ vpmu = vcpu_vpmu(v);
+
+ spin_lock(&vpmu->vpmu_lock);
+
+ if ( v->arch.vpmu.xenpmu_data )
+ {
+ spin_unlock(&vpmu->vpmu_lock);
+ put_page_and_type(page);
+ return -EEXIST;
+ }
+
+ v->arch.vpmu.xenpmu_data = __map_domain_page_global(page);
+ if ( !v->arch.vpmu.xenpmu_data )
+ {
+ spin_unlock(&vpmu->vpmu_lock);
+ put_page_and_type(page);
+ return -ENOMEM;
+ }
+
+ vpmu_initialise(v);
+
+ spin_unlock(&vpmu->vpmu_lock);
+
+ return 0;
+}
+
+static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params)
+{
+ struct vcpu *v;
+ struct vpmu_struct *vpmu;
+ uint64_t mfn;
+ void *xenpmu_data;
+
+ if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
+ return;
+
+ v = d->vcpu[params->vcpu];
+ if ( v != current )
+ vcpu_pause(v);
+
+ vpmu = vcpu_vpmu(v);
+ spin_lock(&vpmu->vpmu_lock);
+
+ vpmu_destroy(v);
+ xenpmu_data = vpmu->xenpmu_data;
+ vpmu->xenpmu_data = NULL;
+
+ spin_unlock(&vpmu->vpmu_lock);
+
+ if ( xenpmu_data )
+ {
+ mfn = domain_page_map_to_mfn(xenpmu_data);
+ ASSERT(mfn_valid(mfn));
+ unmap_domain_page_global(xenpmu_data);
+ put_page_and_type(mfn_to_page(mfn));
+ }
+
+ if ( v != current )
+ vcpu_unpause(v);
+}
+
/* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */
void vpmu_dump(struct vcpu *v)
{
@@ -365,6 +460,8 @@ long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
{
case XENPMU_mode_set:
case XENPMU_feature_set:
+ case XENPMU_init:
+ case XENPMU_finish:
if ( copy_from_guest(&pmu_params, arg, 1) )
return -EFAULT;
@@ -444,6 +541,14 @@ long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
break;
+ case XENPMU_init:
+ ret = pvpmu_init(current->domain, &pmu_params);
+ break;
+
+ case XENPMU_finish:
+ pvpmu_finish(current->domain, &pmu_params);
+ break;
+
default:
ret = -EINVAL;
}
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index fae242d..310f590 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -106,6 +106,7 @@ static int virq_is_global(uint32_t virq)
case VIRQ_TIMER:
case VIRQ_DEBUG:
case VIRQ_XENOPROF:
+ case VIRQ_XENPMU:
rc = 0;
break;
case VIRQ_ARCH_0 ... VIRQ_ARCH_7:
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index 96f7666..642a4b7 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -64,6 +64,8 @@ struct vpmu_struct {
void *context; /* May be shared with PV guest */
void *priv_context; /* hypervisor-only */
struct arch_vpmu_ops *arch_vpmu_ops;
+ struct xen_pmu_data *xenpmu_data;
+ spinlock_t vpmu_lock;
};
/* VPMU states */
diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
index 50cc048..e6307b5 100644
--- a/xen/include/public/pmu.h
+++ b/xen/include/public/pmu.h
@@ -25,6 +25,8 @@
#define XENPMU_mode_set 1
#define XENPMU_feature_get 2
#define XENPMU_feature_set 3
+#define XENPMU_init 4
+#define XENPMU_finish 5
/* ` } */
/* Parameters structure for HYPERVISOR_xenpmu_op call */
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 59fb544..1b98d8b 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -161,6 +161,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */
#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */
#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */
+#define VIRQ_XENPMU 13 /* V. PMC interrupt */
/* Architecture-specific VIRQ definitions. */
#define VIRQ_ARCH_0 16
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index a496116..6456f72 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -704,6 +704,9 @@ static XSM_INLINE int xsm_pmu_op (XSM_DEFAULT_ARG struct domain *d, int op)
case XENPMU_feature_set:
case XENPMU_feature_get:
return xsm_default_action(XSM_PRIV, d, current->domain);
+ case XENPMU_init:
+ case XENPMU_finish:
+ return xsm_default_action(XSM_HOOK, d, current->domain);
default:
return -EPERM;
}
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index ac98966..aefcbda 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1592,6 +1592,10 @@ static int flask_pmu_op (struct domain *d, unsigned int op)
case XENPMU_feature_get:
return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2,
XEN2__PMU_CTRL, NULL);
+ case XENPMU_init:
+ case XENPMU_finish:
+ return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2,
+ XEN2__PMU_USE, NULL);
default:
return -EPERM;
}
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index 3a5d798..a6517a3 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -89,6 +89,8 @@ class xen2
get_symbol
# PMU control
pmu_ctrl
+# PMU use (domains, including unprivileged ones, will be using this operation)
+ pmu_use
}
# Classes domain and domain2 consist of operations that a domain performs on
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v24 06/15] x86/VPMU: Initialize PMU for PV(H) guests
2015-06-10 15:04 ` [PATCH v24 06/15] x86/VPMU: Initialize PMU for PV(H) guests Boris Ostrovsky
@ 2015-06-11 8:21 ` Tian, Kevin
0 siblings, 0 replies; 36+ messages in thread
From: Tian, Kevin @ 2015-06-11 8:21 UTC (permalink / raw)
To: Boris Ostrovsky, JBeulich, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: tim, Nakajima, Jun, xen-devel
> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> Sent: Wednesday, June 10, 2015 11:04 PM
>
> Code for initializing/tearing down PMU for PV guests
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Acked-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> Acked-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
> ---
> tools/flask/policy/policy/modules/xen/xen.te | 4 +
> xen/arch/x86/domain.c | 2 +
> xen/arch/x86/hvm/hvm.c | 1 +
> xen/arch/x86/hvm/svm/svm.c | 4 +-
> xen/arch/x86/hvm/svm/vpmu.c | 16 +++-
> xen/arch/x86/hvm/vmx/vmx.c | 4 +-
> xen/arch/x86/hvm/vmx/vpmu_core2.c | 30 ++++--
> xen/arch/x86/hvm/vpmu.c | 131
> ++++++++++++++++++++++++---
> xen/common/event_channel.c | 1 +
> xen/include/asm-x86/hvm/vpmu.h | 2 +
> xen/include/public/pmu.h | 2 +
> xen/include/public/xen.h | 1 +
> xen/include/xsm/dummy.h | 3 +
> xen/xsm/flask/hooks.c | 4 +
> xen/xsm/flask/policy/access_vectors | 2 +
> 15 files changed, 181 insertions(+), 26 deletions(-)
>
> diff --git a/tools/flask/policy/policy/modules/xen/xen.te
> b/tools/flask/policy/policy/modules/xen/xen.te
> index 45b5cb2..f553eb5 100644
> --- a/tools/flask/policy/policy/modules/xen/xen.te
> +++ b/tools/flask/policy/policy/modules/xen/xen.te
> @@ -130,6 +130,10 @@ if (guest_writeconsole) {
> dontaudit domain_type xen_t : xen writeconsole;
> }
>
> +# Allow all domains to use PMU (but not to change its settings --- that's what
> +# pmu_ctrl is for)
> +allow domain_type xen_t:xen2 pmu_use;
> +
>
> #####################################################
> ##########################
> #
> # Domain creation
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index 8cc3ae7..8e79aca 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -438,6 +438,8 @@ int vcpu_initialise(struct vcpu *v)
> vmce_init_vcpu(v);
> }
>
> + spin_lock_init(&v->arch.vpmu.vpmu_lock);
> +
> if ( has_hvm_container_domain(d) )
> {
> rc = hvm_vcpu_initialise(v);
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index f354cb7..016e842 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -4914,6 +4914,7 @@ static hvm_hypercall_t *const
> pvh_hypercall64_table[NR_hypercalls] = {
> HYPERCALL(hvm_op),
> HYPERCALL(sysctl),
> HYPERCALL(domctl),
> + HYPERCALL(xenpmu_op),
> [ __HYPERVISOR_arch_1 ] = (hvm_hypercall_t *)paging_domctl_continuation
> };
>
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index 6734fb6..49d2ddf 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -1165,7 +1165,9 @@ static int svm_vcpu_initialise(struct vcpu *v)
> return rc;
> }
>
> - vpmu_initialise(v);
> + /* PVH's VPMU is initialized via hypercall */
> + if ( is_hvm_vcpu(v) )
> + vpmu_initialise(v);
>
> svm_guest_osvw_init(v);
>
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index b60ca40..a8572a6 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -364,13 +364,11 @@ static void amd_vpmu_destroy(struct vcpu *v)
> amd_vpmu_unset_msr_bitmap(v);
>
> xfree(vpmu->context);
> - vpmu_reset(vpmu, VPMU_CONTEXT_ALLOCATED);
>
> if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
> - {
> - vpmu_reset(vpmu, VPMU_RUNNING);
> release_pmu_ownship(PMU_OWNER_HVM);
> - }
> +
> + vpmu_clear(vpmu);
> }
>
> /* VPMU part of the 'q' keyhandler */
> @@ -482,6 +480,16 @@ int __init amd_vpmu_init(void)
> return -EINVAL;
> }
>
> + if ( sizeof(struct xen_pmu_data) +
> + 2 * sizeof(uint64_t) * num_counters > PAGE_SIZE )
> + {
> + printk(XENLOG_WARNING
> + "VPMU: Register bank does not fit into VPMU shared page\n");
> + counters = ctrls = NULL;
> + num_counters = 0;
> + return -ENOSPC;
> + }
> +
> return 0;
> }
>
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index af257db..62d99e1 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -140,7 +140,9 @@ static int vmx_vcpu_initialise(struct vcpu *v)
> }
> }
>
> - vpmu_initialise(v);
> + /* PVH's VPMU is initialized via hypercall */
> + if ( is_hvm_vcpu(v) )
> + vpmu_initialise(v);
>
> vmx_install_vlapic_mapping(v);
>
> diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> index 025c970..e7642e5 100644
> --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> @@ -365,13 +365,16 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
> if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
> return 0;
>
> - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
> - if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
> - goto out_err;
> + if ( has_hvm_container_vcpu(v) )
> + {
> + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
> + if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
> + goto out_err;
>
> - if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
> - goto out_err;
> - vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
> + if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
> + goto out_err;
> + vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
> + }
>
> core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
> sizeof(uint64_t) * fixed_pmc_cnt +
> @@ -717,7 +720,7 @@ static void core2_vpmu_destroy(struct vcpu *v)
> if ( has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
> core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
> release_pmu_ownship(PMU_OWNER_HVM);
> - vpmu_reset(vpmu, VPMU_CONTEXT_ALLOCATED);
> + vpmu_clear(vpmu);
> }
>
> struct arch_vpmu_ops core2_vpmu_ops = {
> @@ -827,6 +830,10 @@ int vmx_vpmu_initialise(struct vcpu *v)
> ds_warned = 1;
> func_out:
>
> + /* PV domains can allocate resources immediately */
> + if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) )
> + return -EIO;
> +
> vpmu->arch_vpmu_ops = &core2_vpmu_ops;
>
> return 0;
> @@ -897,6 +904,15 @@ int __init core2_vpmu_init(void)
>
> check_pmc_quirk();
>
> + if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
> + sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE )
> + {
> + printk(XENLOG_WARNING
> + "VPMU: Register bank does not fit into VPMU share page\n");
> + arch_pmc_cnt = fixed_pmc_cnt = 0;
> + return -ENOSPC;
> + }
> +
> return 0;
> }
>
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index bb0b2e3..cfaa8c8 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -27,6 +27,7 @@
> #include <asm/types.h>
> #include <asm/msr.h>
> #include <asm/nmi.h>
> +#include <asm/p2m.h>
> #include <asm/hvm/support.h>
> #include <asm/hvm/vmx/vmx.h>
> #include <asm/hvm/vmx/vmcs.h>
> @@ -257,22 +258,25 @@ void vpmu_initialise(struct vcpu *v)
> struct vpmu_struct *vpmu = vcpu_vpmu(v);
> uint8_t vendor = current_cpu_data.x86_vendor;
> int ret;
> + bool_t is_priv_vpmu = is_hardware_domain(v->domain);
>
> BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
> BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
>
> - if ( is_pvh_vcpu(v) )
> - return;
> -
> ASSERT(!vpmu->flags && !vpmu->context);
>
> - /*
> - * Count active VPMUs so that we won't try to change vpmu_mode while
> - * they are in use.
> - */
> - spin_lock(&vpmu_lock);
> - vpmu_count++;
> - spin_unlock(&vpmu_lock);
> + if ( !is_priv_vpmu )
> + {
> + /*
> + * Count active VPMUs so that we won't try to change vpmu_mode while
> + * they are in use.
> + * vpmu_mode can be safely updated while dom0's VPMUs are active and
> + * so we don't need to include it in the count.
> + */
> + spin_lock(&vpmu_lock);
> + vpmu_count++;
> + spin_unlock(&vpmu_lock);
> + }
>
> switch ( vendor )
> {
> @@ -299,7 +303,7 @@ void vpmu_initialise(struct vcpu *v)
> printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
>
> /* Intel needs to initialize VPMU ops even if VPMU is not in use */
> - if ( ret || (vpmu_mode == XENPMU_MODE_OFF) )
> + if ( !is_priv_vpmu && (ret || (vpmu_mode == XENPMU_MODE_OFF)) )
> {
> spin_lock(&vpmu_lock);
> vpmu_count--;
> @@ -332,13 +336,104 @@ void vpmu_destroy(struct vcpu *v)
> vpmu_clear_last, v, 1);
>
> if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
> - vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
> + {
> + /* Unload VPMU first. This will stop counters */
> + on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu),
> + vpmu_save_force, v, 1);
> + vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
> + }
>
> spin_lock(&vpmu_lock);
> - vpmu_count--;
> + if ( !is_hardware_domain(v->domain) )
> + vpmu_count--;
> spin_unlock(&vpmu_lock);
> }
>
> +static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
> +{
> + struct vcpu *v;
> + struct vpmu_struct *vpmu;
> + struct page_info *page;
> + uint64_t gfn = params->val;
> +
> + if ( vpmu_mode == XENPMU_MODE_OFF )
> + return -EINVAL;
> +
> + if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
> + return -EINVAL;
> +
> + page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
> + if ( !page )
> + return -EINVAL;
> +
> + if ( !get_page_type(page, PGT_writable_page) )
> + {
> + put_page(page);
> + return -EINVAL;
> + }
> +
> + v = d->vcpu[params->vcpu];
> + vpmu = vcpu_vpmu(v);
> +
> + spin_lock(&vpmu->vpmu_lock);
> +
> + if ( v->arch.vpmu.xenpmu_data )
> + {
> + spin_unlock(&vpmu->vpmu_lock);
> + put_page_and_type(page);
> + return -EEXIST;
> + }
> +
> + v->arch.vpmu.xenpmu_data = __map_domain_page_global(page);
> + if ( !v->arch.vpmu.xenpmu_data )
> + {
> + spin_unlock(&vpmu->vpmu_lock);
> + put_page_and_type(page);
> + return -ENOMEM;
> + }
> +
> + vpmu_initialise(v);
> +
> + spin_unlock(&vpmu->vpmu_lock);
> +
> + return 0;
> +}
> +
> +static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params)
> +{
> + struct vcpu *v;
> + struct vpmu_struct *vpmu;
> + uint64_t mfn;
> + void *xenpmu_data;
> +
> + if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
> + return;
> +
> + v = d->vcpu[params->vcpu];
> + if ( v != current )
> + vcpu_pause(v);
> +
> + vpmu = vcpu_vpmu(v);
> + spin_lock(&vpmu->vpmu_lock);
> +
> + vpmu_destroy(v);
> + xenpmu_data = vpmu->xenpmu_data;
> + vpmu->xenpmu_data = NULL;
> +
> + spin_unlock(&vpmu->vpmu_lock);
> +
> + if ( xenpmu_data )
> + {
> + mfn = domain_page_map_to_mfn(xenpmu_data);
> + ASSERT(mfn_valid(mfn));
> + unmap_domain_page_global(xenpmu_data);
> + put_page_and_type(mfn_to_page(mfn));
> + }
> +
> + if ( v != current )
> + vcpu_unpause(v);
> +}
> +
> /* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */
> void vpmu_dump(struct vcpu *v)
> {
> @@ -365,6 +460,8 @@ long do_xenpmu_op(unsigned int op,
> XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
> {
> case XENPMU_mode_set:
> case XENPMU_feature_set:
> + case XENPMU_init:
> + case XENPMU_finish:
> if ( copy_from_guest(&pmu_params, arg, 1) )
> return -EFAULT;
>
> @@ -444,6 +541,14 @@ long do_xenpmu_op(unsigned int op,
> XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
>
> break;
>
> + case XENPMU_init:
> + ret = pvpmu_init(current->domain, &pmu_params);
> + break;
> +
> + case XENPMU_finish:
> + pvpmu_finish(current->domain, &pmu_params);
> + break;
> +
> default:
> ret = -EINVAL;
> }
> diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
> index fae242d..310f590 100644
> --- a/xen/common/event_channel.c
> +++ b/xen/common/event_channel.c
> @@ -106,6 +106,7 @@ static int virq_is_global(uint32_t virq)
> case VIRQ_TIMER:
> case VIRQ_DEBUG:
> case VIRQ_XENOPROF:
> + case VIRQ_XENPMU:
> rc = 0;
> break;
> case VIRQ_ARCH_0 ... VIRQ_ARCH_7:
> diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
> index 96f7666..642a4b7 100644
> --- a/xen/include/asm-x86/hvm/vpmu.h
> +++ b/xen/include/asm-x86/hvm/vpmu.h
> @@ -64,6 +64,8 @@ struct vpmu_struct {
> void *context; /* May be shared with PV guest */
> void *priv_context; /* hypervisor-only */
> struct arch_vpmu_ops *arch_vpmu_ops;
> + struct xen_pmu_data *xenpmu_data;
> + spinlock_t vpmu_lock;
> };
>
> /* VPMU states */
> diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
> index 50cc048..e6307b5 100644
> --- a/xen/include/public/pmu.h
> +++ b/xen/include/public/pmu.h
> @@ -25,6 +25,8 @@
> #define XENPMU_mode_set 1
> #define XENPMU_feature_get 2
> #define XENPMU_feature_set 3
> +#define XENPMU_init 4
> +#define XENPMU_finish 5
> /* ` } */
>
> /* Parameters structure for HYPERVISOR_xenpmu_op call */
> diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
> index 59fb544..1b98d8b 100644
> --- a/xen/include/public/xen.h
> +++ b/xen/include/public/xen.h
> @@ -161,6 +161,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
> #define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */
> #define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */
> #define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */
> +#define VIRQ_XENPMU 13 /* V. PMC interrupt */
>
> /* Architecture-specific VIRQ definitions. */
> #define VIRQ_ARCH_0 16
> diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
> index a496116..6456f72 100644
> --- a/xen/include/xsm/dummy.h
> +++ b/xen/include/xsm/dummy.h
> @@ -704,6 +704,9 @@ static XSM_INLINE int xsm_pmu_op (XSM_DEFAULT_ARG struct
> domain *d, int op)
> case XENPMU_feature_set:
> case XENPMU_feature_get:
> return xsm_default_action(XSM_PRIV, d, current->domain);
> + case XENPMU_init:
> + case XENPMU_finish:
> + return xsm_default_action(XSM_HOOK, d, current->domain);
> default:
> return -EPERM;
> }
> diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
> index ac98966..aefcbda 100644
> --- a/xen/xsm/flask/hooks.c
> +++ b/xen/xsm/flask/hooks.c
> @@ -1592,6 +1592,10 @@ static int flask_pmu_op (struct domain *d, unsigned int op)
> case XENPMU_feature_get:
> return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2,
> XEN2__PMU_CTRL, NULL);
> + case XENPMU_init:
> + case XENPMU_finish:
> + return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2,
> + XEN2__PMU_USE, NULL);
> default:
> return -EPERM;
> }
> diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
> index 3a5d798..a6517a3 100644
> --- a/xen/xsm/flask/policy/access_vectors
> +++ b/xen/xsm/flask/policy/access_vectors
> @@ -89,6 +89,8 @@ class xen2
> get_symbol
> # PMU control
> pmu_ctrl
> +# PMU use (domains, including unprivileged ones, will be using this operation)
> + pmu_use
> }
>
> # Classes domain and domain2 consist of operations that a domain performs on
> --
> 1.8.1.4
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v24 07/15] x86/VPMU: Save VPMU state for PV guests during context switch
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
` (5 preceding siblings ...)
2015-06-10 15:04 ` [PATCH v24 06/15] x86/VPMU: Initialize PMU for PV(H) guests Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-10 15:04 ` [PATCH v24 08/15] x86/VPMU: When handling MSR accesses, leave fault injection to callers Boris Ostrovsky
` (7 subsequent siblings)
14 siblings, 0 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
Save VPMU state during context switch for both HVM and PV(H) guests.
A subsequent patch ("x86/VPMU: NMI-based VPMU support") will make it possible
for vpmu_switch_to() to call vmx_vmcs_try_enter()->vcpu_pause() which needs
is_running to be correctly set/cleared. To prepare for that, call context_saved()
before vpmu_switch_to() is executed. (Note that while this change could have
been dalayed until that later patch, the changes are harmless to existing code
and so we do it here)
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/domain.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 8e79aca..2b82f61 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1537,17 +1537,14 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
}
if ( prev != next )
- _update_runstate_area(prev);
-
- if ( is_hvm_vcpu(prev) )
{
- if (prev != next)
- vpmu_switch_from(prev);
-
- if ( !list_empty(&prev->arch.hvm_vcpu.tm_list) )
- pt_save_timer(prev);
+ _update_runstate_area(prev);
+ vpmu_switch_from(prev);
}
+ if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) )
+ pt_save_timer(prev);
+
local_irq_disable();
set_current(next);
@@ -1585,15 +1582,16 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
!is_hardware_domain(next->domain));
}
- if (is_hvm_vcpu(next) && (prev != next) )
- /* Must be done with interrupts enabled */
- vpmu_switch_to(next);
-
context_saved(prev);
if ( prev != next )
+ {
_update_runstate_area(next);
+ /* Must be done with interrupts enabled */
+ vpmu_switch_to(next);
+ }
+
/* Ensure that the vcpu has an up-to-date time base. */
update_vcpu_system_time(next);
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v24 08/15] x86/VPMU: When handling MSR accesses, leave fault injection to callers
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
` (6 preceding siblings ...)
2015-06-10 15:04 ` [PATCH v24 07/15] x86/VPMU: Save VPMU state for PV guests during context switch Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-15 15:06 ` Jan Beulich
2015-06-10 15:04 ` [PATCH v24 09/15] x86/VPMU: Add support for PMU register handling on PV guests Boris Ostrovsky
` (6 subsequent siblings)
14 siblings, 1 reply; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
With this patch vpmu_do_msr() will return an error code to indicate whether an
error was encountered during MSR processing (instead of stating that the access
was to a VPMU register).
As part of this patch we also check for validity of certain MSR accesses right
when we determine which register is being written, as opposed to postponing this
until later.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
Tested-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
---
Changes in v24:
* Return -EINVAL instead of 1 on error (and adjust commit message appropriately)
xen/arch/x86/hvm/svm/svm.c | 6 ++-
xen/arch/x86/hvm/svm/vpmu.c | 6 +--
xen/arch/x86/hvm/vmx/vmx.c | 24 ++++++++---
xen/arch/x86/hvm/vmx/vpmu_core2.c | 86 +++++++++++++++------------------------
4 files changed, 57 insertions(+), 65 deletions(-)
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 49d2ddf..08a44f2 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1708,7 +1708,8 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
case MSR_AMD_FAM15H_EVNTSEL3:
case MSR_AMD_FAM15H_EVNTSEL4:
case MSR_AMD_FAM15H_EVNTSEL5:
- vpmu_do_rdmsr(msr, msr_content);
+ if ( vpmu_do_rdmsr(msr, msr_content) )
+ goto gpf;
break;
case MSR_AMD64_DR0_ADDRESS_MASK:
@@ -1859,7 +1860,8 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
case MSR_AMD_FAM15H_EVNTSEL3:
case MSR_AMD_FAM15H_EVNTSEL4:
case MSR_AMD_FAM15H_EVNTSEL5:
- vpmu_do_wrmsr(msr, msr_content, 0);
+ if ( vpmu_do_wrmsr(msr, msr_content, 0) )
+ goto gpf;
break;
case MSR_IA32_MCx_MISC(4): /* Threshold register */
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index a8572a6..74d03a5 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -305,7 +305,7 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
is_pmu_enabled(msr_content) && !vpmu_is_set(vpmu, VPMU_RUNNING) )
{
if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
- return 1;
+ return 0;
vpmu_set(vpmu, VPMU_RUNNING);
if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
@@ -335,7 +335,7 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
/* Write to hw counters */
wrmsrl(msr, msr_content);
- return 1;
+ return 0;
}
static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
@@ -353,7 +353,7 @@ static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
rdmsrl(msr, *msr_content);
- return 1;
+ return 0;
}
static void amd_vpmu_destroy(struct vcpu *v)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 62d99e1..89a14f7 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2166,12 +2166,17 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
*msr_content |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
/* Perhaps vpmu will change some bits. */
+ /* FALLTHROUGH */
+ case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
+ case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(3):
+ case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
+ case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ case MSR_IA32_PEBS_ENABLE:
+ case MSR_IA32_DS_AREA:
if ( vpmu_do_rdmsr(msr, msr_content) )
- goto done;
+ goto gp_fault;
break;
default:
- if ( vpmu_do_rdmsr(msr, msr_content) )
- break;
if ( passive_domain_do_rdmsr(msr, msr_content) )
goto done;
switch ( long_mode_do_msr_read(msr, msr_content) )
@@ -2347,7 +2352,7 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
if ( msr_content & ~supported )
{
/* Perhaps some other bits are supported in vpmu. */
- if ( !vpmu_do_wrmsr(msr, msr_content, supported) )
+ if ( vpmu_do_wrmsr(msr, msr_content, supported) )
break;
}
if ( msr_content & IA32_DEBUGCTLMSR_LBR )
@@ -2375,9 +2380,16 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
if ( !nvmx_msr_write_intercept(msr, msr_content) )
goto gp_fault;
break;
+ case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
+ case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(7):
+ case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
+ case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ case MSR_IA32_PEBS_ENABLE:
+ case MSR_IA32_DS_AREA:
+ if ( vpmu_do_wrmsr(msr, msr_content, 0) )
+ goto gp_fault;
+ break;
default:
- if ( vpmu_do_wrmsr(msr, msr_content, 0) )
- return X86EMUL_OKAY;
if ( passive_domain_do_wrmsr(msr, msr_content) )
return X86EMUL_OKAY;
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index e7642e5..9710149 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -454,36 +454,41 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
IA32_DEBUGCTLMSR_BTS_OFF_USR;
if ( !(msr_content & ~supported) &&
vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
- return 1;
+ return 0;
if ( (msr_content & supported) &&
!vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
printk(XENLOG_G_WARNING
"%pv: Debug Store unsupported on this CPU\n",
current);
}
- return 0;
+ return -EINVAL;
}
ASSERT(!supported);
+ if ( type == MSR_TYPE_COUNTER &&
+ (msr_content &
+ ~((1ull << core2_get_bitwidth_fix_count()) - 1)) )
+ /* Writing unsupported bits to a fixed counter */
+ return -EINVAL;
+
core2_vpmu_cxt = vpmu->context;
enabled_cntrs = vpmu->priv_context;
switch ( msr )
{
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
core2_vpmu_cxt->global_status &= ~msr_content;
- return 1;
+ return 0;
case MSR_CORE_PERF_GLOBAL_STATUS:
gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
"MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
- hvm_inject_hw_exception(TRAP_gp_fault, 0);
- return 1;
+ return -EINVAL;
case MSR_IA32_PEBS_ENABLE:
if ( msr_content & 1 )
gdprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, "
"which is not supported.\n");
core2_vpmu_cxt->pebs_enable = msr_content;
- return 1;
+ return 0;
case MSR_IA32_DS_AREA:
if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
{
@@ -492,18 +497,21 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
gdprintk(XENLOG_WARNING,
"Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n",
msr_content);
- hvm_inject_hw_exception(TRAP_gp_fault, 0);
- return 1;
+ return -EINVAL;
}
core2_vpmu_cxt->ds_area = msr_content;
break;
}
gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
- return 1;
+ return 0;
case MSR_CORE_PERF_GLOBAL_CTRL:
global_ctrl = msr_content;
break;
case MSR_CORE_PERF_FIXED_CTR_CTRL:
+ if ( msr_content &
+ ( ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1)) )
+ return -EINVAL;
+
vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
*enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
if ( msr_content != 0 )
@@ -526,6 +534,9 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+ if ( msr_content & (~((1ull << 32) - 1)) )
+ return -EINVAL;
+
vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
if ( msr_content & (1ULL << 22) )
@@ -537,45 +548,17 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
}
}
+ if ( type != MSR_TYPE_GLOBAL )
+ wrmsrl(msr, msr_content);
+ else
+ vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+
if ( (global_ctrl & *enabled_cntrs) || (core2_vpmu_cxt->ds_area != 0) )
vpmu_set(vpmu, VPMU_RUNNING);
else
vpmu_reset(vpmu, VPMU_RUNNING);
- if ( type != MSR_TYPE_GLOBAL )
- {
- u64 mask;
- int inject_gp = 0;
- switch ( type )
- {
- case MSR_TYPE_ARCH_CTRL: /* MSR_P6_EVNTSEL[0,...] */
- mask = ~((1ull << 32) - 1);
- if (msr_content & mask)
- inject_gp = 1;
- break;
- case MSR_TYPE_CTRL: /* IA32_FIXED_CTR_CTRL */
- if ( msr == MSR_IA32_DS_AREA )
- break;
- /* 4 bits per counter, currently 3 fixed counters implemented. */
- mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
- if (msr_content & mask)
- inject_gp = 1;
- break;
- case MSR_TYPE_COUNTER: /* IA32_FIXED_CTR[0-2] */
- mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
- if (msr_content & mask)
- inject_gp = 1;
- break;
- }
- if (inject_gp)
- hvm_inject_hw_exception(TRAP_gp_fault, 0);
- else
- wrmsrl(msr, msr_content);
- }
- else
- vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
-
- return 1;
+ return 0;
}
static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
@@ -603,19 +586,14 @@ static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
rdmsrl(msr, *msr_content);
}
}
- else
+ else if ( msr == MSR_IA32_MISC_ENABLE )
{
/* Extension for BTS */
- if ( msr == MSR_IA32_MISC_ENABLE )
- {
- if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
- *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
- }
- else
- return 0;
+ if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
+ *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
}
- return 1;
+ return 0;
}
static void core2_vpmu_do_cpuid(unsigned int input,
@@ -760,9 +738,9 @@ static int core2_no_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
{
int type = -1, index = -1;
if ( !is_core2_vpmu_msr(msr, &type, &index) )
- return 0;
+ return -EINVAL;
*msr_content = 0;
- return 1;
+ return 0;
}
/*
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v24 08/15] x86/VPMU: When handling MSR accesses, leave fault injection to callers
2015-06-10 15:04 ` [PATCH v24 08/15] x86/VPMU: When handling MSR accesses, leave fault injection to callers Boris Ostrovsky
@ 2015-06-15 15:06 ` Jan Beulich
2015-06-15 16:23 ` Boris Ostrovsky
0 siblings, 1 reply; 36+ messages in thread
From: Jan Beulich @ 2015-06-15 15:06 UTC (permalink / raw)
To: Boris Ostrovsky
Cc: kevin.tian, suravee.suthikulpanit, andrew.cooper3, tim,
dietmar.hahn, xen-devel, Aravind.Gopalakrishnan, jun.nakajima,
dgdegra
>>> On 10.06.15 at 17:04, <boris.ostrovsky@oracle.com> wrote:
> --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> @@ -454,36 +454,41 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
> IA32_DEBUGCTLMSR_BTS_OFF_USR;
> if ( !(msr_content & ~supported) &&
> vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
> - return 1;
> + return 0;
> if ( (msr_content & supported) &&
> !vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
> printk(XENLOG_G_WARNING
> "%pv: Debug Store unsupported on this CPU\n",
> current);
> }
> - return 0;
> + return -EINVAL;
> }
>
> ASSERT(!supported);
>
> + if ( type == MSR_TYPE_COUNTER &&
> + (msr_content &
> + ~((1ull << core2_get_bitwidth_fix_count()) - 1)) )
> + /* Writing unsupported bits to a fixed counter */
> + return -EINVAL;
> +
> core2_vpmu_cxt = vpmu->context;
> enabled_cntrs = vpmu->priv_context;
> switch ( msr )
> {
> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> core2_vpmu_cxt->global_status &= ~msr_content;
> - return 1;
> + return 0;
> case MSR_CORE_PERF_GLOBAL_STATUS:
> gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
> "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
> - hvm_inject_hw_exception(TRAP_gp_fault, 0);
> - return 1;
> + return -EINVAL;
Is it intentional that you convert a success to a failure here? If so,
this should be mentioned (with reason) in the commit message. If
not, this should be adjusted to there's no behavioral change here.
Jan
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 08/15] x86/VPMU: When handling MSR accesses, leave fault injection to callers
2015-06-15 15:06 ` Jan Beulich
@ 2015-06-15 16:23 ` Boris Ostrovsky
0 siblings, 0 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-15 16:23 UTC (permalink / raw)
To: Jan Beulich
Cc: kevin.tian, suravee.suthikulpanit, andrew.cooper3, tim,
dietmar.hahn, xen-devel, Aravind.Gopalakrishnan, jun.nakajima,
dgdegra
On 06/15/2015 11:06 AM, Jan Beulich wrote:
>>>> On 10.06.15 at 17:04, <boris.ostrovsky@oracle.com> wrote:
>> --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
>> +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
>> @@ -454,36 +454,41 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
>> IA32_DEBUGCTLMSR_BTS_OFF_USR;
>> if ( !(msr_content & ~supported) &&
>> vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
>> - return 1;
>> + return 0;
>> if ( (msr_content & supported) &&
>> !vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
>> printk(XENLOG_G_WARNING
>> "%pv: Debug Store unsupported on this CPU\n",
>> current);
>> }
>> - return 0;
>> + return -EINVAL;
>> }
>>
>> ASSERT(!supported);
>>
>> + if ( type == MSR_TYPE_COUNTER &&
>> + (msr_content &
>> + ~((1ull << core2_get_bitwidth_fix_count()) - 1)) )
>> + /* Writing unsupported bits to a fixed counter */
>> + return -EINVAL;
>> +
>> core2_vpmu_cxt = vpmu->context;
>> enabled_cntrs = vpmu->priv_context;
>> switch ( msr )
>> {
>> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
>> core2_vpmu_cxt->global_status &= ~msr_content;
>> - return 1;
>> + return 0;
>> case MSR_CORE_PERF_GLOBAL_STATUS:
>> gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
>> "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
>> - hvm_inject_hw_exception(TRAP_gp_fault, 0);
>> - return 1;
>> + return -EINVAL;
> Is it intentional that you convert a success to a failure here? If so,
> this should be mentioned (with reason) in the commit message. If
> not, this should be adjusted to there's no behavioral change here.
Yes, this is intentional. Until now return value indicated whether
access was to a PMU register. This worked for HVM guests since they can
do hvm_inject_trap() at any time. For PV guests we are called from
emulate_privileged_op() and we need to know whether access was
successful or not. This way emulate_privileged_op() will take care of
fault injection by returning 0.
-boris
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v24 09/15] x86/VPMU: Add support for PMU register handling on PV guests
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
` (7 preceding siblings ...)
2015-06-10 15:04 ` [PATCH v24 08/15] x86/VPMU: When handling MSR accesses, leave fault injection to callers Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-10 15:04 ` [PATCH v24 10/15] x86/VPMU: Use pre-computed masks when checking validity of MSRs Boris Ostrovsky
` (5 subsequent siblings)
14 siblings, 0 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
Intercept accesses to PMU MSRs and process them in VPMU module. If vpmu ops
for VCPU are not initialized (which is the case, for example, for PV guests that
are not "VPMU-enlightened") access to MSRs will return failure.
Dump VPMU state for all domains (HVM and PV) when requested.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
Tested-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
---
xen/arch/x86/domain.c | 3 +--
xen/arch/x86/hvm/vmx/vpmu_core2.c | 49 +++++++++++++++++++++++++++++++------
xen/arch/x86/hvm/vpmu.c | 3 +++
xen/arch/x86/traps.c | 51 +++++++++++++++++++++++++++++++++++++--
4 files changed, 95 insertions(+), 11 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 2b82f61..0aa27ac 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2069,8 +2069,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
{
paging_dump_vcpu_info(v);
- if ( is_hvm_vcpu(v) )
- vpmu_dump(v);
+ vpmu_dump(v);
}
void domain_cpuid(
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index 9710149..089154e 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -27,6 +27,7 @@
#include <asm/regs.h>
#include <asm/types.h>
#include <asm/apic.h>
+#include <asm/traps.h>
#include <asm/msr.h>
#include <asm/msr-index.h>
#include <asm/hvm/support.h>
@@ -299,12 +300,18 @@ static inline void __core2_vpmu_save(struct vcpu *v)
rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
for ( i = 0; i < arch_pmc_cnt; i++ )
rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
+
+ if ( !has_hvm_container_vcpu(v) )
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
}
static int core2_vpmu_save(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ if ( !has_hvm_container_vcpu(v) )
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) )
return 0;
@@ -342,6 +349,13 @@ static inline void __core2_vpmu_load(struct vcpu *v)
wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl);
wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area);
wrmsrl(MSR_IA32_PEBS_ENABLE, core2_vpmu_cxt->pebs_enable);
+
+ if ( !has_hvm_container_vcpu(v) )
+ {
+ wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl);
+ core2_vpmu_cxt->global_ovf_ctrl = 0;
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
+ }
}
static void core2_vpmu_load(struct vcpu *v)
@@ -433,7 +447,6 @@ static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
uint64_t supported)
{
- u64 global_ctrl;
int i, tmp;
int type = -1, index = -1;
struct vcpu *v = current;
@@ -477,7 +490,12 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
switch ( msr )
{
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ if ( msr_content & ~(0xC000000000000000 |
+ (((1ULL << fixed_pmc_cnt) - 1) << 32) |
+ ((1ULL << arch_pmc_cnt) - 1)) )
+ return -EINVAL;
core2_vpmu_cxt->global_status &= ~msr_content;
+ wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
return 0;
case MSR_CORE_PERF_GLOBAL_STATUS:
gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
@@ -505,14 +523,18 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
return 0;
case MSR_CORE_PERF_GLOBAL_CTRL:
- global_ctrl = msr_content;
+ core2_vpmu_cxt->global_ctrl = msr_content;
break;
case MSR_CORE_PERF_FIXED_CTR_CTRL:
if ( msr_content &
( ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1)) )
return -EINVAL;
- vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
+ if ( has_hvm_container_vcpu(v) )
+ vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
+ &core2_vpmu_cxt->global_ctrl);
+ else
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
*enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
if ( msr_content != 0 )
{
@@ -537,7 +559,11 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
if ( msr_content & (~((1ull << 32) - 1)) )
return -EINVAL;
- vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
+ if ( has_hvm_container_vcpu(v) )
+ vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
+ &core2_vpmu_cxt->global_ctrl);
+ else
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
if ( msr_content & (1ULL << 22) )
*enabled_cntrs |= 1ULL << tmp;
@@ -551,9 +577,15 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
if ( type != MSR_TYPE_GLOBAL )
wrmsrl(msr, msr_content);
else
- vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+ {
+ if ( has_hvm_container_vcpu(v) )
+ vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+ else
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+ }
- if ( (global_ctrl & *enabled_cntrs) || (core2_vpmu_cxt->ds_area != 0) )
+ if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) ||
+ (core2_vpmu_cxt->ds_area != 0) )
vpmu_set(vpmu, VPMU_RUNNING);
else
vpmu_reset(vpmu, VPMU_RUNNING);
@@ -580,7 +612,10 @@ static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
*msr_content = core2_vpmu_cxt->global_status;
break;
case MSR_CORE_PERF_GLOBAL_CTRL:
- vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+ if ( has_hvm_container_vcpu(v) )
+ vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+ else
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
break;
default:
rdmsrl(msr, *msr_content);
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index cfaa8c8..07fa368 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -119,6 +119,9 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
+ else
+ *msr_content = 0;
+
return 0;
}
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 080c0c1..0b26100 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -72,6 +72,7 @@
#include <asm/apic.h>
#include <asm/mc146818rtc.h>
#include <asm/hpet.h>
+#include <asm/hvm/vpmu.h>
#include <public/arch-x86/cpuid.h>
#include <xsm/xsm.h>
@@ -968,8 +969,10 @@ void pv_cpuid(struct cpu_user_regs *regs)
__clear_bit(X86_FEATURE_TOPOEXT % 32, &c);
break;
+ case 0x0000000a: /* Architectural Performance Monitor Features (Intel) */
+ break;
+
case 0x00000005: /* MONITOR/MWAIT */
- case 0x0000000a: /* Architectural Performance Monitor Features */
case 0x0000000b: /* Extended Topology Enumeration */
case 0x8000000a: /* SVM revision and features */
case 0x8000001b: /* Instruction Based Sampling */
@@ -985,6 +988,9 @@ void pv_cpuid(struct cpu_user_regs *regs)
}
out:
+ /* VPMU may decide to modify some of the leaves */
+ vpmu_do_cpuid(regs->eax, &a, &b, &c, &d);
+
regs->eax = a;
regs->ebx = b;
regs->ecx = c;
@@ -2008,6 +2014,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
char *io_emul_stub = NULL;
void (*io_emul)(struct cpu_user_regs *) __attribute__((__regparm__(1)));
uint64_t val;
+ bool_t vpmu_msr;
if ( !read_descriptor(regs->cs, v, regs,
&code_base, &code_limit, &ar,
@@ -2502,7 +2509,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
uint32_t eax = regs->eax;
uint32_t edx = regs->edx;
uint64_t msr_content = ((uint64_t)edx << 32) | eax;
-
+ vpmu_msr = 0;
switch ( regs->_ecx )
{
case MSR_FS_BASE:
@@ -2639,6 +2646,22 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
if ( v->arch.debugreg[7] & DR7_ACTIVE_MASK )
wrmsrl(regs->_ecx, msr_content);
break;
+ case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
+ case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(3):
+ case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
+ case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+ {
+ vpmu_msr = 1;
+ case MSR_AMD_FAM15H_EVNTSEL0...MSR_AMD_FAM15H_PERFCTR5:
+ if ( vpmu_msr || (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
+ {
+ if ( vpmu_do_wrmsr(regs->ecx, msr_content, 0) )
+ goto fail;
+ }
+ break;
+ }
+ /*FALLTHROUGH*/
default:
if ( wrmsr_hypervisor_regs(regs->ecx, msr_content) == 1 )
@@ -2674,6 +2697,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
break;
case 0x32: /* RDMSR */
+ vpmu_msr = 0;
switch ( regs->_ecx )
{
case MSR_FS_BASE:
@@ -2741,6 +2765,29 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
[regs->_ecx - MSR_AMD64_DR1_ADDRESS_MASK + 1];
regs->edx = 0;
break;
+ case MSR_IA32_PERF_CAPABILITIES:
+ /* No extra capabilities are supported */
+ regs->eax = regs->edx = 0;
+ break;
+ case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
+ case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(3):
+ case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
+ case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+ {
+ vpmu_msr = 1;
+ case MSR_AMD_FAM15H_EVNTSEL0...MSR_AMD_FAM15H_PERFCTR5:
+ if ( vpmu_msr || (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
+ {
+ if ( vpmu_do_rdmsr(regs->ecx, &val) )
+ goto fail;
+
+ regs->eax = (uint32_t)val;
+ regs->edx = (uint32_t)(val >> 32);
+ }
+ break;
+ }
+ /*FALLTHROUGH*/
default:
if ( rdmsr_hypervisor_regs(regs->ecx, &val) )
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v24 10/15] x86/VPMU: Use pre-computed masks when checking validity of MSRs
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
` (8 preceding siblings ...)
2015-06-10 15:04 ` [PATCH v24 09/15] x86/VPMU: Add support for PMU register handling on PV guests Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-11 8:22 ` Tian, Kevin
2015-06-10 15:04 ` [PATCH v24 11/15] VPMU/AMD: Check MSR values before writing to hardware Boris Ostrovsky
` (4 subsequent siblings)
14 siblings, 1 reply; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
No need to compute those masks on every MSR access.
Also, when checking MSR_P6_EVNTSELx registers make sure that bit 21
(which is a reserved bit) is not set.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v24:
* Mentioned in the commit message that newly-checked bit 21 is reserved
xen/arch/x86/hvm/vmx/vpmu_core2.c | 28 ++++++++++++++++++----------
1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index 089154e..166277a 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -80,9 +80,16 @@ static bool_t __read_mostly full_width_write;
#define FIXED_CTR_CTRL_BITS 4
#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
+#define ARCH_CNTR_ENABLED (1ULL << 22)
+
/* Number of general-purpose and fixed performance counters */
static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
+/* Masks used for testing whether and MSR is valid */
+#define ARCH_CTRL_MASK (~((1ull << 32) - 1) | (1ull << 21))
+static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
+static uint64_t __read_mostly global_ovf_ctrl_mask;
+
/*
* QUIRK to workaround an issue on various family 6 cpus.
* The issue leads to endless PMC interrupt loops on the processor.
@@ -479,9 +486,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
ASSERT(!supported);
- if ( type == MSR_TYPE_COUNTER &&
- (msr_content &
- ~((1ull << core2_get_bitwidth_fix_count()) - 1)) )
+ if ( (type == MSR_TYPE_COUNTER) && (msr_content & fixed_counters_mask) )
/* Writing unsupported bits to a fixed counter */
return -EINVAL;
@@ -490,9 +495,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
switch ( msr )
{
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- if ( msr_content & ~(0xC000000000000000 |
- (((1ULL << fixed_pmc_cnt) - 1) << 32) |
- ((1ULL << arch_pmc_cnt) - 1)) )
+ if ( msr_content & global_ovf_ctrl_mask )
return -EINVAL;
core2_vpmu_cxt->global_status &= ~msr_content;
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
@@ -526,8 +529,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
core2_vpmu_cxt->global_ctrl = msr_content;
break;
case MSR_CORE_PERF_FIXED_CTR_CTRL:
- if ( msr_content &
- ( ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1)) )
+ if ( msr_content & fixed_ctrl_mask )
return -EINVAL;
if ( has_hvm_container_vcpu(v) )
@@ -556,7 +558,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
- if ( msr_content & (~((1ull << 32) - 1)) )
+ if ( msr_content & ARCH_CTRL_MASK )
return -EINVAL;
if ( has_hvm_container_vcpu(v) )
@@ -565,7 +567,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
else
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
- if ( msr_content & (1ULL << 22) )
+ if ( msr_content & ARCH_CNTR_ENABLED )
*enabled_cntrs |= 1ULL << tmp;
else
*enabled_cntrs &= ~(1ULL << tmp);
@@ -915,6 +917,12 @@ int __init core2_vpmu_init(void)
rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
full_width_write = (caps >> 13) & 1;
+ fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
+ fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
+ global_ovf_ctrl_mask = ~(0xC000000000000000 |
+ (((1ULL << fixed_pmc_cnt) - 1) << 32) |
+ ((1ULL << arch_pmc_cnt) - 1));
+
check_pmc_quirk();
if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v24 10/15] x86/VPMU: Use pre-computed masks when checking validity of MSRs
2015-06-10 15:04 ` [PATCH v24 10/15] x86/VPMU: Use pre-computed masks when checking validity of MSRs Boris Ostrovsky
@ 2015-06-11 8:22 ` Tian, Kevin
0 siblings, 0 replies; 36+ messages in thread
From: Tian, Kevin @ 2015-06-11 8:22 UTC (permalink / raw)
To: Boris Ostrovsky, JBeulich, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: tim, Nakajima, Jun, xen-devel
> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> Sent: Wednesday, June 10, 2015 11:04 PM
>
> No need to compute those masks on every MSR access.
>
> Also, when checking MSR_P6_EVNTSELx registers make sure that bit 21
> (which is a reserved bit) is not set.
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
> ---
> Changes in v24:
> * Mentioned in the commit message that newly-checked bit 21 is reserved
>
> xen/arch/x86/hvm/vmx/vpmu_core2.c | 28 ++++++++++++++++++----------
> 1 file changed, 18 insertions(+), 10 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> index 089154e..166277a 100644
> --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> @@ -80,9 +80,16 @@ static bool_t __read_mostly full_width_write;
> #define FIXED_CTR_CTRL_BITS 4
> #define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
>
> +#define ARCH_CNTR_ENABLED (1ULL << 22)
> +
> /* Number of general-purpose and fixed performance counters */
> static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
>
> +/* Masks used for testing whether and MSR is valid */
> +#define ARCH_CTRL_MASK (~((1ull << 32) - 1) | (1ull << 21))
> +static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
> +static uint64_t __read_mostly global_ovf_ctrl_mask;
> +
> /*
> * QUIRK to workaround an issue on various family 6 cpus.
> * The issue leads to endless PMC interrupt loops on the processor.
> @@ -479,9 +486,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t
> msr_content,
>
> ASSERT(!supported);
>
> - if ( type == MSR_TYPE_COUNTER &&
> - (msr_content &
> - ~((1ull << core2_get_bitwidth_fix_count()) - 1)) )
> + if ( (type == MSR_TYPE_COUNTER) && (msr_content & fixed_counters_mask) )
> /* Writing unsupported bits to a fixed counter */
> return -EINVAL;
>
> @@ -490,9 +495,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t
> msr_content,
> switch ( msr )
> {
> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> - if ( msr_content & ~(0xC000000000000000 |
> - (((1ULL << fixed_pmc_cnt) - 1) << 32) |
> - ((1ULL << arch_pmc_cnt) - 1)) )
> + if ( msr_content & global_ovf_ctrl_mask )
> return -EINVAL;
> core2_vpmu_cxt->global_status &= ~msr_content;
> wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
> @@ -526,8 +529,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t
> msr_content,
> core2_vpmu_cxt->global_ctrl = msr_content;
> break;
> case MSR_CORE_PERF_FIXED_CTR_CTRL:
> - if ( msr_content &
> - ( ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1)) )
> + if ( msr_content & fixed_ctrl_mask )
> return -EINVAL;
>
> if ( has_hvm_container_vcpu(v) )
> @@ -556,7 +558,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t
> msr_content,
> struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
> vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
>
> - if ( msr_content & (~((1ull << 32) - 1)) )
> + if ( msr_content & ARCH_CTRL_MASK )
> return -EINVAL;
>
> if ( has_hvm_container_vcpu(v) )
> @@ -565,7 +567,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t
> msr_content,
> else
> rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
>
> - if ( msr_content & (1ULL << 22) )
> + if ( msr_content & ARCH_CNTR_ENABLED )
> *enabled_cntrs |= 1ULL << tmp;
> else
> *enabled_cntrs &= ~(1ULL << tmp);
> @@ -915,6 +917,12 @@ int __init core2_vpmu_init(void)
> rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
> full_width_write = (caps >> 13) & 1;
>
> + fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
> + fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
> + global_ovf_ctrl_mask = ~(0xC000000000000000 |
> + (((1ULL << fixed_pmc_cnt) - 1) << 32) |
> + ((1ULL << arch_pmc_cnt) - 1));
> +
> check_pmc_quirk();
>
> if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
> --
> 1.8.1.4
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v24 11/15] VPMU/AMD: Check MSR values before writing to hardware
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
` (9 preceding siblings ...)
2015-06-10 15:04 ` [PATCH v24 10/15] x86/VPMU: Use pre-computed masks when checking validity of MSRs Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-10 15:04 ` [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests Boris Ostrovsky
` (3 subsequent siblings)
14 siblings, 0 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
A number of fields of PMU control MSRs are defined as Reserved. AMD
documentation requires that such fields are preserved when the register
is written by software.
Add checks to amd_vpmu_do_wrmsr() to make sure that guests don't attempt
to modify those bits.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v24:
* Initialize counters to reserved values instead of all zeroes.
* Move test for invalid wrmsr value to the top of amd_vpmu_do_wrmsr()
xen/arch/x86/hvm/svm/vpmu.c | 49 +++++++++++++++++++++++++++++++++++++++------
1 file changed, 43 insertions(+), 6 deletions(-)
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 74d03a5..934f1b7 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -48,6 +48,7 @@ static bool_t __read_mostly k7_counters_mirrored;
#define F10H_NUM_COUNTERS 4
#define F15H_NUM_COUNTERS 6
+#define MAX_NUM_COUNTERS F15H_NUM_COUNTERS
/* PMU Counter MSRs. */
static const u32 AMD_F10H_COUNTERS[] = {
@@ -83,6 +84,11 @@ static const u32 AMD_F15H_CTRLS[] = {
MSR_AMD_FAM15H_EVNTSEL5
};
+/* Bits [63:42], [39:36], 21 and 19 are reserved */
+#define CTRL_RSVD_MASK ((-1ULL & (~((1ULL << 42) - 1))) | \
+ (0xfULL << 36) | (1ULL << 21) | (1ULL << 19))
+static uint64_t __read_mostly ctrl_rsvd[MAX_NUM_COUNTERS];
+
/* Use private context as a flag for MSR bitmap */
#define msr_bitmap_on(vpmu) do { \
(vpmu)->priv_context = (void *)-1L; \
@@ -92,17 +98,24 @@ static const u32 AMD_F15H_CTRLS[] = {
} while (0)
#define is_msr_bitmap_on(vpmu) ((vpmu)->priv_context != NULL)
-static inline int get_pmu_reg_type(u32 addr)
+static inline int get_pmu_reg_type(u32 addr, unsigned int *idx)
{
if ( (addr >= MSR_K7_EVNTSEL0) && (addr <= MSR_K7_EVNTSEL3) )
+ {
+ *idx = addr - MSR_K7_EVNTSEL0;
return MSR_TYPE_CTRL;
+ }
if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) )
+ {
+ *idx = addr - MSR_K7_PERFCTR0;
return MSR_TYPE_COUNTER;
+ }
if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) &&
(addr <= MSR_AMD_FAM15H_PERFCTR5 ) )
{
+ *idx = (addr - MSR_AMD_FAM15H_EVNTSEL0) >> 1;
if (addr & 1)
return MSR_TYPE_COUNTER;
else
@@ -140,6 +153,16 @@ static inline u32 get_fam15h_addr(u32 addr)
return addr;
}
+static void amd_vpmu_init_regs(struct xen_pmu_amd_ctxt *ctxt)
+{
+ unsigned i;
+ uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+ memset(&ctxt->regs[0], 0, 2 * sizeof(uint64_t) * num_counters);
+ for ( i = 0; i < num_counters; i++ )
+ ctrl_regs[i] = ctrl_rsvd[i];
+}
+
static void amd_vpmu_set_msr_bitmap(struct vcpu *v)
{
unsigned int i;
@@ -289,19 +312,24 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
{
struct vcpu *v = current;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ unsigned int idx = 0;
+ int type = get_pmu_reg_type(msr, &idx);
ASSERT(!supported);
+ if ( (type == MSR_TYPE_CTRL ) &&
+ ((msr_content & CTRL_RSVD_MASK) != ctrl_rsvd[idx]) )
+ return -EINVAL;
+
/* For all counters, enable guest only mode for HVM guest */
- if ( has_hvm_container_vcpu(v) &&
- (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) &&
+ if ( has_hvm_container_vcpu(v) && (type == MSR_TYPE_CTRL) &&
!is_guest_mode(msr_content) )
{
set_guest_mode(msr_content);
}
/* check if the first counter is enabled */
- if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) &&
+ if ( (type == MSR_TYPE_CTRL) &&
is_pmu_enabled(msr_content) && !vpmu_is_set(vpmu, VPMU_RUNNING) )
{
if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
@@ -313,7 +341,7 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
}
/* stop saving & restore if guest stops first counter */
- if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) &&
+ if ( (type == MSR_TYPE_CTRL) &&
(is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) )
{
vpmu_reset(vpmu, VPMU_RUNNING);
@@ -433,7 +461,7 @@ int svm_vpmu_initialise(struct vcpu *v)
if ( !counters )
return -EINVAL;
- ctxt = xzalloc_bytes(sizeof(*ctxt) +
+ ctxt = xmalloc_bytes(sizeof(*ctxt) +
2 * sizeof(uint64_t) * num_counters);
if ( !ctxt )
{
@@ -445,6 +473,7 @@ int svm_vpmu_initialise(struct vcpu *v)
ctxt->counters = sizeof(*ctxt);
ctxt->ctrls = ctxt->counters + sizeof(uint64_t) * num_counters;
+ amd_vpmu_init_regs(ctxt);
vpmu->context = ctxt;
vpmu->priv_context = NULL;
@@ -457,6 +486,8 @@ int svm_vpmu_initialise(struct vcpu *v)
int __init amd_vpmu_init(void)
{
+ unsigned int i;
+
switch ( current_cpu_data.x86 )
{
case 0x15:
@@ -490,6 +521,12 @@ int __init amd_vpmu_init(void)
return -ENOSPC;
}
+ for ( i = 0; i < num_counters; i++ )
+ {
+ rdmsrl(ctrls[i], ctrl_rsvd[i]);
+ ctrl_rsvd[i] &= CTRL_RSVD_MASK;
+ }
+
return 0;
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
` (10 preceding siblings ...)
2015-06-10 15:04 ` [PATCH v24 11/15] VPMU/AMD: Check MSR values before writing to hardware Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-11 8:38 ` Tian, Kevin
2015-06-15 15:50 ` Jan Beulich
2015-06-10 15:04 ` [PATCH v24 13/15] x86/VPMU: Merge vpmu_rdmsr and vpmu_wrmsr Boris Ostrovsky
` (2 subsequent siblings)
14 siblings, 2 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
Add support for handling PMU interrupts for PV(H) guests.
VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
hypercall. This allows the guest to access PMU MSR values that are stored in
VPMU context which is shared between hypervisor and domain, thus avoiding
traps to hypervisor.
Since the interrupt handler may now force VPMU context save (i.e. set
VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
until now expected this flag to be set only when the counters were stopped.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
Changes in v24:
* For both AMD and Intel copy guest's MSRs first into context and then verify
it (to keep things as read-once by hypervisor)
* To make sure that guest did not alter offsets to registers don't copy these
values. Store them into shared area during VPMU initialization. Clarify in
public header file that they are RO by the guest
* Make vpmu_load return arch_vpmu_load()'s error code, not 1.
xen/arch/x86/hvm/svm/vpmu.c | 90 ++++++++++---
xen/arch/x86/hvm/vmx/vpmu_core2.c | 108 ++++++++++++++-
xen/arch/x86/hvm/vpmu.c | 268 +++++++++++++++++++++++++++++++++-----
xen/include/asm-x86/hvm/vpmu.h | 10 +-
xen/include/public/arch-x86/pmu.h | 41 +++++-
xen/include/public/pmu.h | 2 +
xen/include/xsm/dummy.h | 4 +-
xen/xsm/flask/hooks.c | 2 +
8 files changed, 464 insertions(+), 61 deletions(-)
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 934f1b7..b93d31d 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -46,6 +46,9 @@ static const u32 __read_mostly *counters;
static const u32 __read_mostly *ctrls;
static bool_t __read_mostly k7_counters_mirrored;
+/* Total size of PMU registers block (copied to/from PV(H) guest) */
+static unsigned int __read_mostly regs_sz;
+
#define F10H_NUM_COUNTERS 4
#define F15H_NUM_COUNTERS 6
#define MAX_NUM_COUNTERS F15H_NUM_COUNTERS
@@ -158,7 +161,7 @@ static void amd_vpmu_init_regs(struct xen_pmu_amd_ctxt *ctxt)
unsigned i;
uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
- memset(&ctxt->regs[0], 0, 2 * sizeof(uint64_t) * num_counters);
+ memset(&ctxt->regs[0], 0, regs_sz);
for ( i = 0; i < num_counters; i++ )
ctrl_regs[i] = ctrl_rsvd[i];
}
@@ -211,27 +214,65 @@ static inline void context_load(struct vcpu *v)
}
}
-static void amd_vpmu_load(struct vcpu *v)
+static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
- uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+ struct xen_pmu_amd_ctxt *ctxt;
+ uint64_t *ctrl_regs;
+ unsigned int i;
vpmu_reset(vpmu, VPMU_FROZEN);
- if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+ if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
{
- unsigned int i;
+ ctxt = vpmu->context;
+ ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
for ( i = 0; i < num_counters; i++ )
wrmsrl(ctrls[i], ctrl_regs[i]);
- return;
+ return 0;
+ }
+
+ if ( from_guest )
+ {
+ unsigned int num_enabled = 0;
+ struct xen_pmu_amd_ctxt *guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
+
+ ASSERT(!is_hvm_vcpu(v));
+
+ ctxt = vpmu->context;
+ ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+ memcpy(&ctxt->regs[0], &guest_ctxt->regs[0], regs_sz);
+
+ for ( i = 0; i < num_counters; i++ )
+ {
+ if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] )
+ {
+ /*
+ * Not necessary to re-init context since we should never load
+ * it until guest provides valid values. But just to be safe.
+ */
+ amd_vpmu_init_regs(ctxt);
+ return -EINVAL;
+ }
+
+ if ( is_pmu_enabled(ctrl_regs[i]) )
+ num_enabled++;
+ }
+
+ if ( num_enabled )
+ vpmu_set(vpmu, VPMU_RUNNING);
+ else
+ vpmu_reset(vpmu, VPMU_RUNNING);
}
vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
context_load(v);
+
+ return 0;
}
static inline void context_save(struct vcpu *v)
@@ -246,22 +287,17 @@ static inline void context_save(struct vcpu *v)
rdmsrl(counters[i], counter_regs[i]);
}
-static int amd_vpmu_save(struct vcpu *v)
+static int amd_vpmu_save(struct vcpu *v, bool_t to_guest)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
unsigned int i;
- /*
- * Stop the counters. If we came here via vpmu_save_force (i.e.
- * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
- */
+ for ( i = 0; i < num_counters; i++ )
+ wrmsrl(ctrls[i], 0);
+
if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
{
vpmu_set(vpmu, VPMU_FROZEN);
-
- for ( i = 0; i < num_counters; i++ )
- wrmsrl(ctrls[i], 0);
-
return 0;
}
@@ -274,6 +310,16 @@ static int amd_vpmu_save(struct vcpu *v)
has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
amd_vpmu_unset_msr_bitmap(v);
+ if ( to_guest )
+ {
+ struct xen_pmu_amd_ctxt *guest_ctxt, *ctxt;
+
+ ASSERT(!is_hvm_vcpu(v));
+ ctxt = vpmu->context;
+ guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
+ memcpy(&guest_ctxt->regs[0], &ctxt->regs[0], regs_sz);
+ }
+
return 1;
}
@@ -461,8 +507,7 @@ int svm_vpmu_initialise(struct vcpu *v)
if ( !counters )
return -EINVAL;
- ctxt = xmalloc_bytes(sizeof(*ctxt) +
- 2 * sizeof(uint64_t) * num_counters);
+ ctxt = xmalloc_bytes(sizeof(*ctxt) + regs_sz);
if ( !ctxt )
{
printk(XENLOG_G_WARNING "Insufficient memory for PMU, "
@@ -478,6 +523,13 @@ int svm_vpmu_initialise(struct vcpu *v)
vpmu->context = ctxt;
vpmu->priv_context = NULL;
+ if ( !is_hvm_vcpu(v) )
+ {
+ /* Copy register offsets to shared area */
+ ASSERT(vpmu->xenpmu_data);
+ memcpy(&vpmu->xenpmu_data->pmu.c.amd, ctxt, sizeof(*ctxt));
+ }
+
vpmu->arch_vpmu_ops = &amd_vpmu_ops;
vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
@@ -527,6 +579,8 @@ int __init amd_vpmu_init(void)
ctrl_rsvd[i] &= CTRL_RSVD_MASK;
}
+ regs_sz = 2 * sizeof(uint64_t) * num_counters;
+
return 0;
}
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index 166277a..1206e90 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -90,6 +90,13 @@ static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
static uint64_t __read_mostly global_ovf_ctrl_mask;
+/* Total size of PMU registers block (copied to/from PV(H) guest) */
+static unsigned int __read_mostly regs_sz;
+/* Offset into context of the beginning of PMU register block */
+static const unsigned int regs_off =
+ sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
+ sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
+
/*
* QUIRK to workaround an issue on various family 6 cpus.
* The issue leads to endless PMC interrupt loops on the processor.
@@ -312,7 +319,7 @@ static inline void __core2_vpmu_save(struct vcpu *v)
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
}
-static int core2_vpmu_save(struct vcpu *v)
+static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
@@ -329,6 +336,13 @@ static int core2_vpmu_save(struct vcpu *v)
has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
+ if ( to_guest )
+ {
+ ASSERT(!is_hvm_vcpu(v));
+ memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
+ vpmu->context + regs_off, regs_sz);
+ }
+
return 1;
}
@@ -365,16 +379,93 @@ static inline void __core2_vpmu_load(struct vcpu *v)
}
}
-static void core2_vpmu_load(struct vcpu *v)
+static int core2_vpmu_verify(struct vcpu *v)
+{
+ unsigned int i;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+ uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
+ struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+ vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+ uint64_t fixed_ctrl;
+ uint64_t *priv_context = vpmu->priv_context;
+ uint64_t enabled_cntrs = 0;
+
+ if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
+ return -EINVAL;
+
+ fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
+ if ( fixed_ctrl & fixed_ctrl_mask )
+ return -EINVAL;
+
+ for ( i = 0; i < fixed_pmc_cnt; i++ )
+ {
+ if ( fixed_counters[i] & fixed_counters_mask )
+ return -EINVAL;
+ if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
+ enabled_cntrs |= (1ULL << i);
+ }
+ enabled_cntrs <<= 32;
+
+ for ( i = 0; i < arch_pmc_cnt; i++ )
+ {
+ uint64_t control = xen_pmu_cntr_pair[i].control;
+
+ if ( control & ARCH_CTRL_MASK )
+ return -EINVAL;
+ if ( control & ARCH_CNTR_ENABLED )
+ enabled_cntrs |= (1ULL << i);
+ }
+
+ if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) &&
+ !is_canonical_address(core2_vpmu_cxt->ds_area) )
+ return -EINVAL;
+
+ if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
+ (core2_vpmu_cxt->ds_area != 0) )
+ vpmu_set(vpmu, VPMU_RUNNING);
+ else
+ vpmu_reset(vpmu, VPMU_RUNNING);
+
+ *priv_context = enabled_cntrs;
+
+ return 0;
+}
+
+static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
- return;
+ return 0;
+
+ if ( from_guest )
+ {
+ int ret;
+
+ ASSERT(!is_hvm_vcpu(v));
+
+ memcpy(vpmu->context + regs_off,
+ (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
+ regs_sz);
+
+ ret = core2_vpmu_verify(v);
+ if ( ret )
+ {
+ /*
+ * Not necessary since we should never load the context until
+ * guest provides valid values. But just to be safe.
+ */
+ memset(vpmu->context + regs_off, 0, regs_sz);
+ return ret;
+ }
+ }
vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
__core2_vpmu_load(v);
+
+ return 0;
}
static int core2_vpmu_alloc_resource(struct vcpu *v)
@@ -412,6 +503,13 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
vpmu->context = core2_vpmu_cxt;
vpmu->priv_context = p;
+ if ( !is_hvm_vcpu(v) )
+ {
+ /* Copy fixed/arch register offsets to shared area */
+ ASSERT(vpmu->xenpmu_data);
+ memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
+ }
+
vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
return 1;
@@ -923,6 +1021,10 @@ int __init core2_vpmu_init(void)
(((1ULL << fixed_pmc_cnt) - 1) << 32) |
((1ULL << arch_pmc_cnt) - 1));
+ regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
+ sizeof(uint64_t) * fixed_pmc_cnt +
+ sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
+
check_pmc_quirk();
if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index 07fa368..37e541b 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -85,31 +85,56 @@ static void __init parse_vpmu_param(char *s)
void vpmu_lvtpc_update(uint32_t val)
{
struct vpmu_struct *vpmu;
+ struct vcpu *curr = current;
- if ( vpmu_mode == XENPMU_MODE_OFF )
+ if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
return;
- vpmu = vcpu_vpmu(current);
+ vpmu = vcpu_vpmu(curr);
vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
- apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+
+ /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
+ if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data ||
+ !vpmu_is_set(vpmu, VPMU_CACHED) )
+ apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
}
int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported)
{
- struct vpmu_struct *vpmu = vcpu_vpmu(current);
+ struct vcpu *curr = current;
+ struct vpmu_struct *vpmu;
if ( vpmu_mode == XENPMU_MODE_OFF )
return 0;
+ vpmu = vcpu_vpmu(curr);
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
- return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
+ {
+ int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
+
+ /*
+ * We may have received a PMU interrupt during WRMSR handling
+ * and since do_wrmsr may load VPMU context we should save
+ * (and unload) it again.
+ */
+ if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
+ vpmu_is_set(vpmu, VPMU_CACHED) )
+ {
+ vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+ vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0);
+ vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+ }
+ return ret;
+ }
+
return 0;
}
int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
{
- struct vpmu_struct *vpmu = vcpu_vpmu(current);
+ struct vcpu *curr = current;
+ struct vpmu_struct *vpmu;
if ( vpmu_mode == XENPMU_MODE_OFF )
{
@@ -117,39 +142,184 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
return 0;
}
+ vpmu = vcpu_vpmu(curr);
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
- return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
+ {
+ int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
+
+ if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
+ vpmu_is_set(vpmu, VPMU_CACHED) )
+ {
+ vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+ vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0);
+ vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+ }
+ return ret;
+ }
else
*msr_content = 0;
return 0;
}
+static inline struct vcpu *choose_hwdom_vcpu(void)
+{
+ unsigned idx;
+
+ if ( hardware_domain->max_vcpus == 0 )
+ return NULL;
+
+ idx = smp_processor_id() % hardware_domain->max_vcpus;
+
+ return hardware_domain->vcpu[idx];
+}
+
void vpmu_do_interrupt(struct cpu_user_regs *regs)
{
- struct vcpu *v = current;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct vcpu *sampled = current, *sampling;
+ struct vpmu_struct *vpmu;
+ struct vlapic *vlapic;
+ u32 vlapic_lvtpc;
- if ( vpmu->arch_vpmu_ops )
+ /* dom0 will handle interrupt for special domains (e.g. idle domain) */
+ if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED )
+ {
+ sampling = choose_hwdom_vcpu();
+ if ( !sampling )
+ return;
+ }
+ else
+ sampling = sampled;
+
+ vpmu = vcpu_vpmu(sampling);
+ if ( !vpmu->arch_vpmu_ops )
+ return;
+
+ /* PV(H) guest */
+ if ( !is_hvm_vcpu(sampling) )
{
- struct vlapic *vlapic = vcpu_vlapic(v);
- u32 vlapic_lvtpc;
+ const struct cpu_user_regs *cur_regs;
+ uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
+ domid_t domid = DOMID_SELF;
+
+ if ( !vpmu->xenpmu_data )
+ return;
+
+ if ( is_pvh_vcpu(sampling) &&
+ !vpmu->arch_vpmu_ops->do_interrupt(regs) )
+ return;
- if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
- !is_vlapic_lvtpc_enabled(vlapic) )
+ if ( vpmu_is_set(vpmu, VPMU_CACHED) )
return;
- vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+ /* PV guest will be reading PMU MSRs from xenpmu_data */
+ vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+ vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
+ vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
- switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
+ if ( has_hvm_container_vcpu(sampled) )
+ *flags = 0;
+ else
+ *flags = PMU_SAMPLE_PV;
+
+ /* Store appropriate registers in xenpmu_data */
+ /* FIXME: 32-bit PVH should go here as well */
+ if ( is_pv_32bit_vcpu(sampling) )
{
- case APIC_MODE_FIXED:
- vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
- break;
- case APIC_MODE_NMI:
- v->nmi_pending = 1;
- break;
+ /*
+ * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
+ * and therefore we treat it the same way as a non-privileged
+ * PV 32-bit domain.
+ */
+ struct compat_pmu_regs *cmp;
+
+ cur_regs = guest_cpu_user_regs();
+
+ cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
+ cmp->ip = cur_regs->rip;
+ cmp->sp = cur_regs->rsp;
+ cmp->flags = cur_regs->eflags;
+ cmp->ss = cur_regs->ss;
+ cmp->cs = cur_regs->cs;
+ if ( (cmp->cs & 3) > 1 )
+ *flags |= PMU_SAMPLE_USER;
+ }
+ else
+ {
+ struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
+
+ if ( (vpmu_mode & XENPMU_MODE_SELF) )
+ cur_regs = guest_cpu_user_regs();
+ else if ( !guest_mode(regs) && is_hardware_domain(sampling->domain) )
+ {
+ cur_regs = regs;
+ domid = DOMID_XEN;
+ }
+ else
+ cur_regs = guest_cpu_user_regs();
+
+ r->ip = cur_regs->rip;
+ r->sp = cur_regs->rsp;
+ r->flags = cur_regs->eflags;
+
+ if ( !has_hvm_container_vcpu(sampled) )
+ {
+ r->ss = cur_regs->ss;
+ r->cs = cur_regs->cs;
+ if ( !(sampled->arch.flags & TF_kernel_mode) )
+ *flags |= PMU_SAMPLE_USER;
+ }
+ else
+ {
+ struct segment_register seg;
+
+ hvm_get_segment_register(sampled, x86_seg_cs, &seg);
+ r->cs = seg.sel;
+ hvm_get_segment_register(sampled, x86_seg_ss, &seg);
+ r->ss = seg.sel;
+ r->cpl = seg.attr.fields.dpl;
+ if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
+ *flags |= PMU_SAMPLE_REAL;
+ }
}
+
+ vpmu->xenpmu_data->domain_id = domid;
+ vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
+ if ( is_hardware_domain(sampling->domain) )
+ vpmu->xenpmu_data->pcpu_id = smp_processor_id();
+ else
+ vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
+
+ vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
+ apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+ *flags |= PMU_CACHED;
+ vpmu_set(vpmu, VPMU_CACHED);
+
+ send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
+
+ return;
+ }
+
+ /* HVM guests */
+ vlapic = vcpu_vlapic(sampling);
+
+ /* We don't support (yet) HVM dom0 */
+ ASSERT(sampling == sampled);
+
+ if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
+ !is_vlapic_lvtpc_enabled(vlapic) )
+ return;
+
+ vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+
+ switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
+ {
+ case APIC_MODE_FIXED:
+ vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
+ break;
+ case APIC_MODE_NMI:
+ sampling->nmi_pending = 1;
+ break;
}
}
@@ -174,7 +344,7 @@ static void vpmu_save_force(void *arg)
vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
if ( vpmu->arch_vpmu_ops )
- (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v);
+ (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
@@ -193,20 +363,20 @@ void vpmu_save(struct vcpu *v)
per_cpu(last_vcpu, pcpu) = v;
if ( vpmu->arch_vpmu_ops )
- if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) )
+ if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
}
-void vpmu_load(struct vcpu *v)
+int vpmu_load(struct vcpu *v, bool_t from_guest)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
int pcpu = smp_processor_id();
struct vcpu *prev = NULL;
if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
- return;
+ return 0;
/* First time this VCPU is running here */
if ( vpmu->last_pcpu != pcpu )
@@ -245,15 +415,26 @@ void vpmu_load(struct vcpu *v)
local_irq_enable();
/* Only when PMU is counting, we load PMU context immediately. */
- if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
- return;
+ if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
+ (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && vpmu_is_set(vpmu, VPMU_CACHED)) )
+ return 0;
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
{
+ int ret;
+
apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
/* Arch code needs to set VPMU_CONTEXT_LOADED */
- vpmu->arch_vpmu_ops->arch_vpmu_load(v);
+ ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest);
+ if ( ret )
+ {
+ apic_write_around(APIC_LVTPC,
+ vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
+ return ret;
+ }
}
+
+ return 0;
}
void vpmu_initialise(struct vcpu *v)
@@ -265,6 +446,8 @@ void vpmu_initialise(struct vcpu *v)
BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
+ BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
+ BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
ASSERT(!vpmu->flags && !vpmu->context);
@@ -449,7 +632,10 @@ void vpmu_dump(struct vcpu *v)
long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
{
int ret;
+ struct vcpu *curr;
struct xen_pmu_params pmu_params = {.val = 0};
+ struct xen_pmu_data *xenpmu_data;
+ struct vpmu_struct *vpmu;
if ( !opt_vpmu_enabled )
return -EOPNOTSUPP;
@@ -552,6 +738,30 @@ long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
pvpmu_finish(current->domain, &pmu_params);
break;
+ case XENPMU_lvtpc_set:
+ xenpmu_data = current->arch.vpmu.xenpmu_data;
+ if ( xenpmu_data == NULL )
+ return -EINVAL;
+ vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
+ break;
+
+ case XENPMU_flush:
+ curr = current;
+ vpmu = vcpu_vpmu(curr);
+ xenpmu_data = curr->arch.vpmu.xenpmu_data;
+ if ( xenpmu_data == NULL )
+ return -EINVAL;
+ xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
+ vpmu_reset(vpmu, VPMU_CACHED);
+ vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
+ if ( vpmu_load(curr, 1) )
+ {
+ xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
+ vpmu_set(vpmu, VPMU_CACHED);
+ return -EIO;
+ }
+ break ;
+
default:
ret = -EINVAL;
}
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index 642a4b7..f486d2f 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -47,8 +47,8 @@ struct arch_vpmu_ops {
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
void (*arch_vpmu_destroy)(struct vcpu *v);
- int (*arch_vpmu_save)(struct vcpu *v);
- void (*arch_vpmu_load)(struct vcpu *v);
+ int (*arch_vpmu_save)(struct vcpu *v, bool_t to_guest);
+ int (*arch_vpmu_load)(struct vcpu *v, bool_t from_guest);
void (*arch_vpmu_dump)(const struct vcpu *);
};
@@ -75,6 +75,8 @@ struct vpmu_struct {
#define VPMU_CONTEXT_SAVE 0x8 /* Force context save */
#define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not running */
#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20
+/* PV(H) guests: VPMU registers are accessed by guest from shared page */
+#define VPMU_CACHED 0x40
static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
{
@@ -107,7 +109,7 @@ void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
void vpmu_initialise(struct vcpu *v);
void vpmu_destroy(struct vcpu *v);
void vpmu_save(struct vcpu *v);
-void vpmu_load(struct vcpu *v);
+int vpmu_load(struct vcpu *v, bool_t from_guest);
void vpmu_dump(struct vcpu *v);
extern int acquire_pmu_ownership(int pmu_ownership);
@@ -126,7 +128,7 @@ static inline void vpmu_switch_from(struct vcpu *prev)
static inline void vpmu_switch_to(struct vcpu *next)
{
if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
- vpmu_load(next);
+ vpmu_load(next, 0);
}
#endif /* __ASM_X86_HVM_VPMU_H_*/
diff --git a/xen/include/public/arch-x86/pmu.h b/xen/include/public/arch-x86/pmu.h
index 4351115..1a53888 100644
--- a/xen/include/public/arch-x86/pmu.h
+++ b/xen/include/public/arch-x86/pmu.h
@@ -5,7 +5,10 @@
/* AMD PMU registers and structures */
struct xen_pmu_amd_ctxt {
- /* Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd) */
+ /*
+ * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
+ * For PV(H) guests these fields are RO.
+ */
uint32_t counters;
uint32_t ctrls;
@@ -30,7 +33,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_cntr_pair_t);
struct xen_pmu_intel_ctxt {
/*
* Offsets to fixed and architectural counter MSRs (relative to
- * xen_pmu_arch.c.intel)
+ * xen_pmu_arch.c.intel).
+ * For PV(H) guests these fields are RO.
*/
uint32_t fixed_counters;
uint32_t arch_counters;
@@ -69,6 +73,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_regs_t);
/* PMU flags */
#define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */
+#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */
+#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */
+#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */
/*
* Architecture-specific information describing state of the processor at
@@ -93,12 +100,34 @@ struct xen_pmu_arch {
/* WO for hypervisor, RO for guest */
uint64_t pmu_flags;
- /* Placeholder for APIC LVTPC register */
- uint64_t lvtpc_pad;
+ /*
+ * APIC LVTPC register.
+ * RW for both hypervisor and guest.
+ * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
+ * during XENPMU_flush or XENPMU_lvtpc_set.
+ */
+ union {
+ uint32_t lapic_lvtpc;
+ uint64_t pad;
+ } l;
+
+ /*
+ * Vendor-specific PMU registers.
+ * RW for both hypervisor and guest (see exceptions above).
+ * Guest's updates to this field are verified and then loaded by the
+ * hypervisor into hardware during XENPMU_flush
+ */
+ union {
+ struct xen_pmu_amd_ctxt amd;
+ struct xen_pmu_intel_ctxt intel;
- /* Placeholder for vendor-specific PMU registers */
+ /*
+ * Padding for contexts (fixed parts only, does not include MSR banks
+ * that are specified by offsets)
+ */
#define XENPMU_CTXT_PAD_SZ 128
- uint64_t pmu_regs_pad[XENPMU_CTXT_PAD_SZ / 8];
+ uint8_t pad[XENPMU_CTXT_PAD_SZ];
+ } c;
};
typedef struct xen_pmu_arch xen_pmu_arch_t;
DEFINE_XEN_GUEST_HANDLE(xen_pmu_arch_t);
diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
index e6307b5..7a45783 100644
--- a/xen/include/public/pmu.h
+++ b/xen/include/public/pmu.h
@@ -27,6 +27,8 @@
#define XENPMU_feature_set 3
#define XENPMU_init 4
#define XENPMU_finish 5
+#define XENPMU_lvtpc_set 6
+#define XENPMU_flush 7 /* Write cached MSR values to HW */
/* ` } */
/* Parameters structure for HYPERVISOR_xenpmu_op call */
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index 6456f72..37e6aa3 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -705,7 +705,9 @@ static XSM_INLINE int xsm_pmu_op (XSM_DEFAULT_ARG struct domain *d, int op)
case XENPMU_feature_get:
return xsm_default_action(XSM_PRIV, d, current->domain);
case XENPMU_init:
- case XENPMU_finish:
+ case XENPMU_finish:
+ case XENPMU_lvtpc_set:
+ case XENPMU_flush:
return xsm_default_action(XSM_HOOK, d, current->domain);
default:
return -EPERM;
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index aefcbda..4aa3e79 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1594,6 +1594,8 @@ static int flask_pmu_op (struct domain *d, unsigned int op)
XEN2__PMU_CTRL, NULL);
case XENPMU_init:
case XENPMU_finish:
+ case XENPMU_lvtpc_set:
+ case XENPMU_flush:
return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2,
XEN2__PMU_USE, NULL);
default:
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests
2015-06-10 15:04 ` [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests Boris Ostrovsky
@ 2015-06-11 8:38 ` Tian, Kevin
2015-06-11 9:33 ` Jan Beulich
2015-06-15 15:50 ` Jan Beulich
1 sibling, 1 reply; 36+ messages in thread
From: Tian, Kevin @ 2015-06-11 8:38 UTC (permalink / raw)
To: Boris Ostrovsky, JBeulich, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: tim, Nakajima, Jun, xen-devel
> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> Sent: Wednesday, June 10, 2015 11:04 PM
>
> Add support for handling PMU interrupts for PV(H) guests.
>
> VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
> hypercall. This allows the guest to access PMU MSR values that are stored in
> VPMU context which is shared between hypervisor and domain, thus avoiding
> traps to hypervisor.
>
> Since the interrupt handler may now force VPMU context save (i.e. set
> VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
> until now expected this flag to be set only when the counters were stopped.
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Acked-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
I may need more time to understand the whole interrupt stuff for PV(H)
guest. But regarding to VMX specific changes I think they are clear:
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
> ---
> Changes in v24:
> * For both AMD and Intel copy guest's MSRs first into context and then verify
> it (to keep things as read-once by hypervisor)
> * To make sure that guest did not alter offsets to registers don't copy these
> values. Store them into shared area during VPMU initialization. Clarify in
> public header file that they are RO by the guest
> * Make vpmu_load return arch_vpmu_load()'s error code, not 1.
>
> xen/arch/x86/hvm/svm/vpmu.c | 90 ++++++++++---
> xen/arch/x86/hvm/vmx/vpmu_core2.c | 108 ++++++++++++++-
> xen/arch/x86/hvm/vpmu.c | 268
> +++++++++++++++++++++++++++++++++-----
> xen/include/asm-x86/hvm/vpmu.h | 10 +-
> xen/include/public/arch-x86/pmu.h | 41 +++++-
> xen/include/public/pmu.h | 2 +
> xen/include/xsm/dummy.h | 4 +-
> xen/xsm/flask/hooks.c | 2 +
> 8 files changed, 464 insertions(+), 61 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index 934f1b7..b93d31d 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -46,6 +46,9 @@ static const u32 __read_mostly *counters;
> static const u32 __read_mostly *ctrls;
> static bool_t __read_mostly k7_counters_mirrored;
>
> +/* Total size of PMU registers block (copied to/from PV(H) guest) */
> +static unsigned int __read_mostly regs_sz;
> +
> #define F10H_NUM_COUNTERS 4
> #define F15H_NUM_COUNTERS 6
> #define MAX_NUM_COUNTERS F15H_NUM_COUNTERS
> @@ -158,7 +161,7 @@ static void amd_vpmu_init_regs(struct xen_pmu_amd_ctxt *ctxt)
> unsigned i;
> uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
>
> - memset(&ctxt->regs[0], 0, 2 * sizeof(uint64_t) * num_counters);
> + memset(&ctxt->regs[0], 0, regs_sz);
> for ( i = 0; i < num_counters; i++ )
> ctrl_regs[i] = ctrl_rsvd[i];
> }
> @@ -211,27 +214,65 @@ static inline void context_load(struct vcpu *v)
> }
> }
>
> -static void amd_vpmu_load(struct vcpu *v)
> +static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
> {
> struct vpmu_struct *vpmu = vcpu_vpmu(v);
> - struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
> - uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
> + struct xen_pmu_amd_ctxt *ctxt;
> + uint64_t *ctrl_regs;
> + unsigned int i;
>
> vpmu_reset(vpmu, VPMU_FROZEN);
>
> - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> + if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> {
> - unsigned int i;
> + ctxt = vpmu->context;
> + ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
>
> for ( i = 0; i < num_counters; i++ )
> wrmsrl(ctrls[i], ctrl_regs[i]);
>
> - return;
> + return 0;
> + }
> +
> + if ( from_guest )
> + {
> + unsigned int num_enabled = 0;
> + struct xen_pmu_amd_ctxt *guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
> +
> + ASSERT(!is_hvm_vcpu(v));
> +
> + ctxt = vpmu->context;
> + ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
> +
> + memcpy(&ctxt->regs[0], &guest_ctxt->regs[0], regs_sz);
> +
> + for ( i = 0; i < num_counters; i++ )
> + {
> + if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] )
> + {
> + /*
> + * Not necessary to re-init context since we should never load
> + * it until guest provides valid values. But just to be safe.
> + */
> + amd_vpmu_init_regs(ctxt);
> + return -EINVAL;
> + }
> +
> + if ( is_pmu_enabled(ctrl_regs[i]) )
> + num_enabled++;
> + }
> +
> + if ( num_enabled )
> + vpmu_set(vpmu, VPMU_RUNNING);
> + else
> + vpmu_reset(vpmu, VPMU_RUNNING);
> }
>
> vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
>
> context_load(v);
> +
> + return 0;
> }
>
> static inline void context_save(struct vcpu *v)
> @@ -246,22 +287,17 @@ static inline void context_save(struct vcpu *v)
> rdmsrl(counters[i], counter_regs[i]);
> }
>
> -static int amd_vpmu_save(struct vcpu *v)
> +static int amd_vpmu_save(struct vcpu *v, bool_t to_guest)
> {
> struct vpmu_struct *vpmu = vcpu_vpmu(v);
> unsigned int i;
>
> - /*
> - * Stop the counters. If we came here via vpmu_save_force (i.e.
> - * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
> - */
> + for ( i = 0; i < num_counters; i++ )
> + wrmsrl(ctrls[i], 0);
> +
> if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
> {
> vpmu_set(vpmu, VPMU_FROZEN);
> -
> - for ( i = 0; i < num_counters; i++ )
> - wrmsrl(ctrls[i], 0);
> -
> return 0;
> }
>
> @@ -274,6 +310,16 @@ static int amd_vpmu_save(struct vcpu *v)
> has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
> amd_vpmu_unset_msr_bitmap(v);
>
> + if ( to_guest )
> + {
> + struct xen_pmu_amd_ctxt *guest_ctxt, *ctxt;
> +
> + ASSERT(!is_hvm_vcpu(v));
> + ctxt = vpmu->context;
> + guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
> + memcpy(&guest_ctxt->regs[0], &ctxt->regs[0], regs_sz);
> + }
> +
> return 1;
> }
>
> @@ -461,8 +507,7 @@ int svm_vpmu_initialise(struct vcpu *v)
> if ( !counters )
> return -EINVAL;
>
> - ctxt = xmalloc_bytes(sizeof(*ctxt) +
> - 2 * sizeof(uint64_t) * num_counters);
> + ctxt = xmalloc_bytes(sizeof(*ctxt) + regs_sz);
> if ( !ctxt )
> {
> printk(XENLOG_G_WARNING "Insufficient memory for PMU, "
> @@ -478,6 +523,13 @@ int svm_vpmu_initialise(struct vcpu *v)
> vpmu->context = ctxt;
> vpmu->priv_context = NULL;
>
> + if ( !is_hvm_vcpu(v) )
> + {
> + /* Copy register offsets to shared area */
> + ASSERT(vpmu->xenpmu_data);
> + memcpy(&vpmu->xenpmu_data->pmu.c.amd, ctxt, sizeof(*ctxt));
> + }
> +
> vpmu->arch_vpmu_ops = &amd_vpmu_ops;
>
> vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
> @@ -527,6 +579,8 @@ int __init amd_vpmu_init(void)
> ctrl_rsvd[i] &= CTRL_RSVD_MASK;
> }
>
> + regs_sz = 2 * sizeof(uint64_t) * num_counters;
> +
> return 0;
> }
>
> diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> index 166277a..1206e90 100644
> --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> @@ -90,6 +90,13 @@ static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
> static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
> static uint64_t __read_mostly global_ovf_ctrl_mask;
>
> +/* Total size of PMU registers block (copied to/from PV(H) guest) */
> +static unsigned int __read_mostly regs_sz;
> +/* Offset into context of the beginning of PMU register block */
> +static const unsigned int regs_off =
> + sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
> + sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
> +
> /*
> * QUIRK to workaround an issue on various family 6 cpus.
> * The issue leads to endless PMC interrupt loops on the processor.
> @@ -312,7 +319,7 @@ static inline void __core2_vpmu_save(struct vcpu *v)
> rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
> }
>
> -static int core2_vpmu_save(struct vcpu *v)
> +static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
> {
> struct vpmu_struct *vpmu = vcpu_vpmu(v);
>
> @@ -329,6 +336,13 @@ static int core2_vpmu_save(struct vcpu *v)
> has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
> core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
>
> + if ( to_guest )
> + {
> + ASSERT(!is_hvm_vcpu(v));
> + memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
> + vpmu->context + regs_off, regs_sz);
> + }
> +
> return 1;
> }
>
> @@ -365,16 +379,93 @@ static inline void __core2_vpmu_load(struct vcpu *v)
> }
> }
>
> -static void core2_vpmu_load(struct vcpu *v)
> +static int core2_vpmu_verify(struct vcpu *v)
> +{
> + unsigned int i;
> + struct vpmu_struct *vpmu = vcpu_vpmu(v);
> + struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
> + uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
> + struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
> + vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
> + uint64_t fixed_ctrl;
> + uint64_t *priv_context = vpmu->priv_context;
> + uint64_t enabled_cntrs = 0;
> +
> + if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
> + return -EINVAL;
> +
> + fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
> + if ( fixed_ctrl & fixed_ctrl_mask )
> + return -EINVAL;
> +
> + for ( i = 0; i < fixed_pmc_cnt; i++ )
> + {
> + if ( fixed_counters[i] & fixed_counters_mask )
> + return -EINVAL;
> + if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
> + enabled_cntrs |= (1ULL << i);
> + }
> + enabled_cntrs <<= 32;
> +
> + for ( i = 0; i < arch_pmc_cnt; i++ )
> + {
> + uint64_t control = xen_pmu_cntr_pair[i].control;
> +
> + if ( control & ARCH_CTRL_MASK )
> + return -EINVAL;
> + if ( control & ARCH_CNTR_ENABLED )
> + enabled_cntrs |= (1ULL << i);
> + }
> +
> + if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) &&
> + !is_canonical_address(core2_vpmu_cxt->ds_area) )
> + return -EINVAL;
> +
> + if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
> + (core2_vpmu_cxt->ds_area != 0) )
> + vpmu_set(vpmu, VPMU_RUNNING);
> + else
> + vpmu_reset(vpmu, VPMU_RUNNING);
> +
> + *priv_context = enabled_cntrs;
> +
> + return 0;
> +}
> +
> +static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
> {
> struct vpmu_struct *vpmu = vcpu_vpmu(v);
>
> if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> - return;
> + return 0;
> +
> + if ( from_guest )
> + {
> + int ret;
> +
> + ASSERT(!is_hvm_vcpu(v));
> +
> + memcpy(vpmu->context + regs_off,
> + (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
> + regs_sz);
> +
> + ret = core2_vpmu_verify(v);
> + if ( ret )
> + {
> + /*
> + * Not necessary since we should never load the context until
> + * guest provides valid values. But just to be safe.
> + */
> + memset(vpmu->context + regs_off, 0, regs_sz);
> + return ret;
> + }
> + }
>
> vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
>
> __core2_vpmu_load(v);
> +
> + return 0;
> }
>
> static int core2_vpmu_alloc_resource(struct vcpu *v)
> @@ -412,6 +503,13 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
> vpmu->context = core2_vpmu_cxt;
> vpmu->priv_context = p;
>
> + if ( !is_hvm_vcpu(v) )
> + {
> + /* Copy fixed/arch register offsets to shared area */
> + ASSERT(vpmu->xenpmu_data);
> + memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
> + }
> +
> vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
>
> return 1;
> @@ -923,6 +1021,10 @@ int __init core2_vpmu_init(void)
> (((1ULL << fixed_pmc_cnt) - 1) << 32) |
> ((1ULL << arch_pmc_cnt) - 1));
>
> + regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
> + sizeof(uint64_t) * fixed_pmc_cnt +
> + sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
> +
> check_pmc_quirk();
>
> if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index 07fa368..37e541b 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -85,31 +85,56 @@ static void __init parse_vpmu_param(char *s)
> void vpmu_lvtpc_update(uint32_t val)
> {
> struct vpmu_struct *vpmu;
> + struct vcpu *curr = current;
>
> - if ( vpmu_mode == XENPMU_MODE_OFF )
> + if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
> return;
>
> - vpmu = vcpu_vpmu(current);
> + vpmu = vcpu_vpmu(curr);
>
> vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
> - apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> +
> + /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
> + if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data ||
> + !vpmu_is_set(vpmu, VPMU_CACHED) )
> + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> }
>
> int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported)
> {
> - struct vpmu_struct *vpmu = vcpu_vpmu(current);
> + struct vcpu *curr = current;
> + struct vpmu_struct *vpmu;
>
> if ( vpmu_mode == XENPMU_MODE_OFF )
> return 0;
>
> + vpmu = vcpu_vpmu(curr);
> if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
> - return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
> + {
> + int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
> +
> + /*
> + * We may have received a PMU interrupt during WRMSR handling
> + * and since do_wrmsr may load VPMU context we should save
> + * (and unload) it again.
> + */
> + if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
> + vpmu_is_set(vpmu, VPMU_CACHED) )
> + {
> + vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> + vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0);
> + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> + }
> + return ret;
> + }
> +
> return 0;
> }
>
> int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
> {
> - struct vpmu_struct *vpmu = vcpu_vpmu(current);
> + struct vcpu *curr = current;
> + struct vpmu_struct *vpmu;
>
> if ( vpmu_mode == XENPMU_MODE_OFF )
> {
> @@ -117,39 +142,184 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
> return 0;
> }
>
> + vpmu = vcpu_vpmu(curr);
> if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
> - return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> + {
> + int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> +
> + if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
> + vpmu_is_set(vpmu, VPMU_CACHED) )
> + {
> + vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> + vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0);
> + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> + }
> + return ret;
> + }
> else
> *msr_content = 0;
>
> return 0;
> }
>
> +static inline struct vcpu *choose_hwdom_vcpu(void)
> +{
> + unsigned idx;
> +
> + if ( hardware_domain->max_vcpus == 0 )
> + return NULL;
> +
> + idx = smp_processor_id() % hardware_domain->max_vcpus;
> +
> + return hardware_domain->vcpu[idx];
> +}
> +
> void vpmu_do_interrupt(struct cpu_user_regs *regs)
> {
> - struct vcpu *v = current;
> - struct vpmu_struct *vpmu = vcpu_vpmu(v);
> + struct vcpu *sampled = current, *sampling;
> + struct vpmu_struct *vpmu;
> + struct vlapic *vlapic;
> + u32 vlapic_lvtpc;
>
> - if ( vpmu->arch_vpmu_ops )
> + /* dom0 will handle interrupt for special domains (e.g. idle domain) */
> + if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED )
> + {
> + sampling = choose_hwdom_vcpu();
> + if ( !sampling )
> + return;
> + }
> + else
> + sampling = sampled;
> +
> + vpmu = vcpu_vpmu(sampling);
> + if ( !vpmu->arch_vpmu_ops )
> + return;
> +
> + /* PV(H) guest */
> + if ( !is_hvm_vcpu(sampling) )
> {
> - struct vlapic *vlapic = vcpu_vlapic(v);
> - u32 vlapic_lvtpc;
> + const struct cpu_user_regs *cur_regs;
> + uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
> + domid_t domid = DOMID_SELF;
> +
> + if ( !vpmu->xenpmu_data )
> + return;
> +
> + if ( is_pvh_vcpu(sampling) &&
> + !vpmu->arch_vpmu_ops->do_interrupt(regs) )
> + return;
>
> - if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
> - !is_vlapic_lvtpc_enabled(vlapic) )
> + if ( vpmu_is_set(vpmu, VPMU_CACHED) )
> return;
>
> - vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
> + /* PV guest will be reading PMU MSRs from xenpmu_data */
> + vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> + vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
> + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
>
> - switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
> + if ( has_hvm_container_vcpu(sampled) )
> + *flags = 0;
> + else
> + *flags = PMU_SAMPLE_PV;
> +
> + /* Store appropriate registers in xenpmu_data */
> + /* FIXME: 32-bit PVH should go here as well */
> + if ( is_pv_32bit_vcpu(sampling) )
> {
> - case APIC_MODE_FIXED:
> - vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
> - break;
> - case APIC_MODE_NMI:
> - v->nmi_pending = 1;
> - break;
> + /*
> + * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
> + * and therefore we treat it the same way as a non-privileged
> + * PV 32-bit domain.
> + */
> + struct compat_pmu_regs *cmp;
> +
> + cur_regs = guest_cpu_user_regs();
> +
> + cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
> + cmp->ip = cur_regs->rip;
> + cmp->sp = cur_regs->rsp;
> + cmp->flags = cur_regs->eflags;
> + cmp->ss = cur_regs->ss;
> + cmp->cs = cur_regs->cs;
> + if ( (cmp->cs & 3) > 1 )
> + *flags |= PMU_SAMPLE_USER;
> + }
> + else
> + {
> + struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
> +
> + if ( (vpmu_mode & XENPMU_MODE_SELF) )
> + cur_regs = guest_cpu_user_regs();
> + else if ( !guest_mode(regs) && is_hardware_domain(sampling->domain) )
> + {
> + cur_regs = regs;
> + domid = DOMID_XEN;
> + }
> + else
> + cur_regs = guest_cpu_user_regs();
> +
> + r->ip = cur_regs->rip;
> + r->sp = cur_regs->rsp;
> + r->flags = cur_regs->eflags;
> +
> + if ( !has_hvm_container_vcpu(sampled) )
> + {
> + r->ss = cur_regs->ss;
> + r->cs = cur_regs->cs;
> + if ( !(sampled->arch.flags & TF_kernel_mode) )
> + *flags |= PMU_SAMPLE_USER;
> + }
> + else
> + {
> + struct segment_register seg;
> +
> + hvm_get_segment_register(sampled, x86_seg_cs, &seg);
> + r->cs = seg.sel;
> + hvm_get_segment_register(sampled, x86_seg_ss, &seg);
> + r->ss = seg.sel;
> + r->cpl = seg.attr.fields.dpl;
> + if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
> + *flags |= PMU_SAMPLE_REAL;
> + }
> }
> +
> + vpmu->xenpmu_data->domain_id = domid;
> + vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
> + if ( is_hardware_domain(sampling->domain) )
> + vpmu->xenpmu_data->pcpu_id = smp_processor_id();
> + else
> + vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
> +
> + vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
> + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> + *flags |= PMU_CACHED;
> + vpmu_set(vpmu, VPMU_CACHED);
> +
> + send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
> +
> + return;
> + }
> +
> + /* HVM guests */
> + vlapic = vcpu_vlapic(sampling);
> +
> + /* We don't support (yet) HVM dom0 */
> + ASSERT(sampling == sampled);
> +
> + if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
> + !is_vlapic_lvtpc_enabled(vlapic) )
> + return;
> +
> + vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
> +
> + switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
> + {
> + case APIC_MODE_FIXED:
> + vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
> + break;
> + case APIC_MODE_NMI:
> + sampling->nmi_pending = 1;
> + break;
> }
> }
>
> @@ -174,7 +344,7 @@ static void vpmu_save_force(void *arg)
> vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
>
> if ( vpmu->arch_vpmu_ops )
> - (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v);
> + (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
>
> vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
>
> @@ -193,20 +363,20 @@ void vpmu_save(struct vcpu *v)
> per_cpu(last_vcpu, pcpu) = v;
>
> if ( vpmu->arch_vpmu_ops )
> - if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) )
> + if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
> vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
>
> apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
> }
>
> -void vpmu_load(struct vcpu *v)
> +int vpmu_load(struct vcpu *v, bool_t from_guest)
> {
> struct vpmu_struct *vpmu = vcpu_vpmu(v);
> int pcpu = smp_processor_id();
> struct vcpu *prev = NULL;
>
> if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
> - return;
> + return 0;
>
> /* First time this VCPU is running here */
> if ( vpmu->last_pcpu != pcpu )
> @@ -245,15 +415,26 @@ void vpmu_load(struct vcpu *v)
> local_irq_enable();
>
> /* Only when PMU is counting, we load PMU context immediately. */
> - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
> - return;
> + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
> + (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && vpmu_is_set(vpmu, VPMU_CACHED)) )
> + return 0;
>
> if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
> {
> + int ret;
> +
> apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> /* Arch code needs to set VPMU_CONTEXT_LOADED */
> - vpmu->arch_vpmu_ops->arch_vpmu_load(v);
> + ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest);
> + if ( ret )
> + {
> + apic_write_around(APIC_LVTPC,
> + vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
> + return ret;
> + }
> }
> +
> + return 0;
> }
>
> void vpmu_initialise(struct vcpu *v)
> @@ -265,6 +446,8 @@ void vpmu_initialise(struct vcpu *v)
>
> BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
> BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
> + BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
> + BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
>
> ASSERT(!vpmu->flags && !vpmu->context);
>
> @@ -449,7 +632,10 @@ void vpmu_dump(struct vcpu *v)
> long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t)
> arg)
> {
> int ret;
> + struct vcpu *curr;
> struct xen_pmu_params pmu_params = {.val = 0};
> + struct xen_pmu_data *xenpmu_data;
> + struct vpmu_struct *vpmu;
>
> if ( !opt_vpmu_enabled )
> return -EOPNOTSUPP;
> @@ -552,6 +738,30 @@ long do_xenpmu_op(unsigned int op,
> XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
> pvpmu_finish(current->domain, &pmu_params);
> break;
>
> + case XENPMU_lvtpc_set:
> + xenpmu_data = current->arch.vpmu.xenpmu_data;
> + if ( xenpmu_data == NULL )
> + return -EINVAL;
> + vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
> + break;
> +
> + case XENPMU_flush:
> + curr = current;
> + vpmu = vcpu_vpmu(curr);
> + xenpmu_data = curr->arch.vpmu.xenpmu_data;
> + if ( xenpmu_data == NULL )
> + return -EINVAL;
> + xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
> + vpmu_reset(vpmu, VPMU_CACHED);
> + vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
> + if ( vpmu_load(curr, 1) )
> + {
> + xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
> + vpmu_set(vpmu, VPMU_CACHED);
> + return -EIO;
> + }
> + break ;
> +
> default:
> ret = -EINVAL;
> }
> diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
> index 642a4b7..f486d2f 100644
> --- a/xen/include/asm-x86/hvm/vpmu.h
> +++ b/xen/include/asm-x86/hvm/vpmu.h
> @@ -47,8 +47,8 @@ struct arch_vpmu_ops {
> unsigned int *eax, unsigned int *ebx,
> unsigned int *ecx, unsigned int *edx);
> void (*arch_vpmu_destroy)(struct vcpu *v);
> - int (*arch_vpmu_save)(struct vcpu *v);
> - void (*arch_vpmu_load)(struct vcpu *v);
> + int (*arch_vpmu_save)(struct vcpu *v, bool_t to_guest);
> + int (*arch_vpmu_load)(struct vcpu *v, bool_t from_guest);
> void (*arch_vpmu_dump)(const struct vcpu *);
> };
>
> @@ -75,6 +75,8 @@ struct vpmu_struct {
> #define VPMU_CONTEXT_SAVE 0x8 /* Force context save */
> #define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not
> running */
> #define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20
> +/* PV(H) guests: VPMU registers are accessed by guest from shared page */
> +#define VPMU_CACHED 0x40
>
> static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
> {
> @@ -107,7 +109,7 @@ void vpmu_do_cpuid(unsigned int input, unsigned int *eax,
> unsigned int *ebx,
> void vpmu_initialise(struct vcpu *v);
> void vpmu_destroy(struct vcpu *v);
> void vpmu_save(struct vcpu *v);
> -void vpmu_load(struct vcpu *v);
> +int vpmu_load(struct vcpu *v, bool_t from_guest);
> void vpmu_dump(struct vcpu *v);
>
> extern int acquire_pmu_ownership(int pmu_ownership);
> @@ -126,7 +128,7 @@ static inline void vpmu_switch_from(struct vcpu *prev)
> static inline void vpmu_switch_to(struct vcpu *next)
> {
> if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
> - vpmu_load(next);
> + vpmu_load(next, 0);
> }
>
> #endif /* __ASM_X86_HVM_VPMU_H_*/
> diff --git a/xen/include/public/arch-x86/pmu.h b/xen/include/public/arch-x86/pmu.h
> index 4351115..1a53888 100644
> --- a/xen/include/public/arch-x86/pmu.h
> +++ b/xen/include/public/arch-x86/pmu.h
> @@ -5,7 +5,10 @@
>
> /* AMD PMU registers and structures */
> struct xen_pmu_amd_ctxt {
> - /* Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd) */
> + /*
> + * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
> + * For PV(H) guests these fields are RO.
> + */
> uint32_t counters;
> uint32_t ctrls;
>
> @@ -30,7 +33,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_cntr_pair_t);
> struct xen_pmu_intel_ctxt {
> /*
> * Offsets to fixed and architectural counter MSRs (relative to
> - * xen_pmu_arch.c.intel)
> + * xen_pmu_arch.c.intel).
> + * For PV(H) guests these fields are RO.
> */
> uint32_t fixed_counters;
> uint32_t arch_counters;
> @@ -69,6 +73,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_regs_t);
>
> /* PMU flags */
> #define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */
> +#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */
> +#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */
> +#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */
>
> /*
> * Architecture-specific information describing state of the processor at
> @@ -93,12 +100,34 @@ struct xen_pmu_arch {
> /* WO for hypervisor, RO for guest */
> uint64_t pmu_flags;
>
> - /* Placeholder for APIC LVTPC register */
> - uint64_t lvtpc_pad;
> + /*
> + * APIC LVTPC register.
> + * RW for both hypervisor and guest.
> + * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
> + * during XENPMU_flush or XENPMU_lvtpc_set.
> + */
> + union {
> + uint32_t lapic_lvtpc;
> + uint64_t pad;
> + } l;
> +
> + /*
> + * Vendor-specific PMU registers.
> + * RW for both hypervisor and guest (see exceptions above).
> + * Guest's updates to this field are verified and then loaded by the
> + * hypervisor into hardware during XENPMU_flush
> + */
> + union {
> + struct xen_pmu_amd_ctxt amd;
> + struct xen_pmu_intel_ctxt intel;
>
> - /* Placeholder for vendor-specific PMU registers */
> + /*
> + * Padding for contexts (fixed parts only, does not include MSR banks
> + * that are specified by offsets)
> + */
> #define XENPMU_CTXT_PAD_SZ 128
> - uint64_t pmu_regs_pad[XENPMU_CTXT_PAD_SZ / 8];
> + uint8_t pad[XENPMU_CTXT_PAD_SZ];
> + } c;
> };
> typedef struct xen_pmu_arch xen_pmu_arch_t;
> DEFINE_XEN_GUEST_HANDLE(xen_pmu_arch_t);
> diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
> index e6307b5..7a45783 100644
> --- a/xen/include/public/pmu.h
> +++ b/xen/include/public/pmu.h
> @@ -27,6 +27,8 @@
> #define XENPMU_feature_set 3
> #define XENPMU_init 4
> #define XENPMU_finish 5
> +#define XENPMU_lvtpc_set 6
> +#define XENPMU_flush 7 /* Write cached MSR values to HW */
> /* ` } */
>
> /* Parameters structure for HYPERVISOR_xenpmu_op call */
> diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
> index 6456f72..37e6aa3 100644
> --- a/xen/include/xsm/dummy.h
> +++ b/xen/include/xsm/dummy.h
> @@ -705,7 +705,9 @@ static XSM_INLINE int xsm_pmu_op (XSM_DEFAULT_ARG struct
> domain *d, int op)
> case XENPMU_feature_get:
> return xsm_default_action(XSM_PRIV, d, current->domain);
> case XENPMU_init:
> - case XENPMU_finish:
> + case XENPMU_finish:
> + case XENPMU_lvtpc_set:
> + case XENPMU_flush:
> return xsm_default_action(XSM_HOOK, d, current->domain);
> default:
> return -EPERM;
> diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
> index aefcbda..4aa3e79 100644
> --- a/xen/xsm/flask/hooks.c
> +++ b/xen/xsm/flask/hooks.c
> @@ -1594,6 +1594,8 @@ static int flask_pmu_op (struct domain *d, unsigned int op)
> XEN2__PMU_CTRL, NULL);
> case XENPMU_init:
> case XENPMU_finish:
> + case XENPMU_lvtpc_set:
> + case XENPMU_flush:
> return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2,
> XEN2__PMU_USE, NULL);
> default:
> --
> 1.8.1.4
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests
2015-06-11 8:38 ` Tian, Kevin
@ 2015-06-11 9:33 ` Jan Beulich
2015-06-11 9:36 ` Tian, Kevin
0 siblings, 1 reply; 36+ messages in thread
From: Jan Beulich @ 2015-06-11 9:33 UTC (permalink / raw)
To: Kevin Tian
Cc: suravee.suthikulpanit, andrew.cooper3, tim, dietmar.hahn,
xen-devel, Aravind.Gopalakrishnan, Jun Nakajima, Boris Ostrovsky,
dgdegra
>>> On 11.06.15 at 10:38, <kevin.tian@intel.com> wrote:
>> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
>> Sent: Wednesday, June 10, 2015 11:04 PM
>>
>> Add support for handling PMU interrupts for PV(H) guests.
>>
>> VPMU for the interrupted VCPU is unloaded until the guest issues
> XENPMU_flush
>> hypercall. This allows the guest to access PMU MSR values that are stored in
>> VPMU context which is shared between hypervisor and domain, thus avoiding
>> traps to hypervisor.
>>
>> Since the interrupt handler may now force VPMU context save (i.e. set
>> VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
>> until now expected this flag to be set only when the counters were stopped.
>>
>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>> Acked-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
>
> I may need more time to understand the whole interrupt stuff for PV(H)
> guest. But regarding to VMX specific changes I think they are clear:
>
> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
I don't think you really meant S-o-b here?
Jan
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests
2015-06-11 9:33 ` Jan Beulich
@ 2015-06-11 9:36 ` Tian, Kevin
0 siblings, 0 replies; 36+ messages in thread
From: Tian, Kevin @ 2015-06-11 9:36 UTC (permalink / raw)
To: Jan Beulich
Cc: suravee.suthikulpanit, andrew.cooper3, tim, dietmar.hahn,
xen-devel, Aravind.Gopalakrishnan, Nakajima, Jun,
Boris Ostrovsky, dgdegra
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Thursday, June 11, 2015 5:33 PM
>
> >>> On 11.06.15 at 10:38, <kevin.tian@intel.com> wrote:
> >> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> >> Sent: Wednesday, June 10, 2015 11:04 PM
> >>
> >> Add support for handling PMU interrupts for PV(H) guests.
> >>
> >> VPMU for the interrupted VCPU is unloaded until the guest issues
> > XENPMU_flush
> >> hypercall. This allows the guest to access PMU MSR values that are stored in
> >> VPMU context which is shared between hypervisor and domain, thus avoiding
> >> traps to hypervisor.
> >>
> >> Since the interrupt handler may now force VPMU context save (i.e. set
> >> VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
> >> until now expected this flag to be set only when the counters were stopped.
> >>
> >> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> >> Acked-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> >
> > I may need more time to understand the whole interrupt stuff for PV(H)
> > guest. But regarding to VMX specific changes I think they are clear:
> >
> > Signed-off-by: Kevin Tian <kevin.tian@intel.com>
>
> I don't think you really meant S-o-b here?
>
My bad when doing batch reviews.
Acked-by: Kevin Tian <kevin.tian@intel.com>
Thanks
Kevin
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests
2015-06-10 15:04 ` [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests Boris Ostrovsky
2015-06-11 8:38 ` Tian, Kevin
@ 2015-06-15 15:50 ` Jan Beulich
2015-06-15 17:17 ` Boris Ostrovsky
1 sibling, 1 reply; 36+ messages in thread
From: Jan Beulich @ 2015-06-15 15:50 UTC (permalink / raw)
To: Boris Ostrovsky
Cc: kevin.tian, suravee.suthikulpanit, andrew.cooper3, tim,
dietmar.hahn, xen-devel, Aravind.Gopalakrishnan, jun.nakajima,
dgdegra
>>> On 10.06.15 at 17:04, <boris.ostrovsky@oracle.com> wrote:
> @@ -211,27 +214,65 @@ static inline void context_load(struct vcpu *v)
> }
> }
>
> -static void amd_vpmu_load(struct vcpu *v)
> +static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
> {
> struct vpmu_struct *vpmu = vcpu_vpmu(v);
> - struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
> - uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
> + struct xen_pmu_amd_ctxt *ctxt;
> + uint64_t *ctrl_regs;
> + unsigned int i;
>
> vpmu_reset(vpmu, VPMU_FROZEN);
>
> - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> + if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> {
> - unsigned int i;
> + ctxt = vpmu->context;
> + ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
>
> for ( i = 0; i < num_counters; i++ )
> wrmsrl(ctrls[i], ctrl_regs[i]);
>
> - return;
> + return 0;
> + }
> +
> + if ( from_guest )
Generally I dislike such redundancy (
if ( cond1 && cond2 )
return;
if ( !cond1 )
...
which can be written without checking cond1 twice) - do you really
think it is beneficial for overall readability to have it that way?
> + {
> + unsigned int num_enabled = 0;
> + struct xen_pmu_amd_ctxt *guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
> +
> + ASSERT(!is_hvm_vcpu(v));
> +
> + ctxt = vpmu->context;
> + ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
> +
> + memcpy(&ctxt->regs[0], &guest_ctxt->regs[0], regs_sz);
So that's the live structure, not any staging area iiuc. What state
is the guest going to be in when validation fails (and you can't
restore the original state)? And what guarantees that nothing
elsewhere in the hypervisor uses the data _before_ the
validation below succeeds?
> + for ( i = 0; i < num_counters; i++ )
> + {
> + if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] )
> + {
> + /*
> + * Not necessary to re-init context since we should never load
> + * it until guest provides valid values. But just to be safe.
> + */
> + amd_vpmu_init_regs(ctxt);
> + return -EINVAL;
> + }
> +
> + if ( is_pmu_enabled(ctrl_regs[i]) )
> + num_enabled++;
> + }
> +
> + if ( num_enabled )
Looks like a boolean flag would do - the exact count doesn't seem to
be of interest here or in later patches?
> @@ -246,22 +287,17 @@ static inline void context_save(struct vcpu *v)
> rdmsrl(counters[i], counter_regs[i]);
> }
>
> -static int amd_vpmu_save(struct vcpu *v)
> +static int amd_vpmu_save(struct vcpu *v, bool_t to_guest)
> {
> struct vpmu_struct *vpmu = vcpu_vpmu(v);
> unsigned int i;
>
> - /*
> - * Stop the counters. If we came here via vpmu_save_force (i.e.
> - * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
> - */
> + for ( i = 0; i < num_counters; i++ )
> + wrmsrl(ctrls[i], 0);
Wouldn't it make sense to retain the first sentence of the comment?
> @@ -478,6 +523,13 @@ int svm_vpmu_initialise(struct vcpu *v)
> vpmu->context = ctxt;
> vpmu->priv_context = NULL;
>
> + if ( !is_hvm_vcpu(v) )
> + {
> + /* Copy register offsets to shared area */
> + ASSERT(vpmu->xenpmu_data);
> + memcpy(&vpmu->xenpmu_data->pmu.c.amd, ctxt, sizeof(*ctxt));
At the first glance the comment looks as if it wasn't in line with
the sizeof() used - offsetof() would be more obvious here (or a
file scope constant like you use on the Intel side).
> @@ -552,6 +738,30 @@ long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
> pvpmu_finish(current->domain, &pmu_params);
> break;
>
> + case XENPMU_lvtpc_set:
> + xenpmu_data = current->arch.vpmu.xenpmu_data;
> + if ( xenpmu_data == NULL )
> + return -EINVAL;
> + vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
> + break;
> +
> + case XENPMU_flush:
> + curr = current;
> + vpmu = vcpu_vpmu(curr);
> + xenpmu_data = curr->arch.vpmu.xenpmu_data;
> + if ( xenpmu_data == NULL )
> + return -EINVAL;
> + xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
> + vpmu_reset(vpmu, VPMU_CACHED);
> + vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
> + if ( vpmu_load(curr, 1) )
> + {
> + xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
> + vpmu_set(vpmu, VPMU_CACHED);
> + return -EIO;
> + }
> + break ;
> +
> default:
> ret = -EINVAL;
Considering how the default case gets handled, can't at least the 1st
and 3rd return-s above become "ret = -E...", avoiding needlessly
many return points in the function?
Jan
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests
2015-06-15 15:50 ` Jan Beulich
@ 2015-06-15 17:17 ` Boris Ostrovsky
2015-06-16 7:45 ` Jan Beulich
0 siblings, 1 reply; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-15 17:17 UTC (permalink / raw)
To: Jan Beulich
Cc: kevin.tian, suravee.suthikulpanit, andrew.cooper3, tim,
dietmar.hahn, xen-devel, Aravind.Gopalakrishnan, jun.nakajima,
dgdegra
On 06/15/2015 11:50 AM, Jan Beulich wrote:
>>>> On 10.06.15 at 17:04, <boris.ostrovsky@oracle.com> wrote:
>> @@ -211,27 +214,65 @@ static inline void context_load(struct vcpu *v)
>> }
>> }
>>
>> -static void amd_vpmu_load(struct vcpu *v)
>> +static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
>> {
>> struct vpmu_struct *vpmu = vcpu_vpmu(v);
>> - struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
>> - uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
>> + struct xen_pmu_amd_ctxt *ctxt;
>> + uint64_t *ctrl_regs;
>> + unsigned int i;
>>
>> vpmu_reset(vpmu, VPMU_FROZEN);
>>
>> - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
>> + if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
>> {
>> - unsigned int i;
>> + ctxt = vpmu->context;
>> + ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
>>
>> for ( i = 0; i < num_counters; i++ )
>> wrmsrl(ctrls[i], ctrl_regs[i]);
>>
>> - return;
>> + return 0;
>> + }
>> +
>> + if ( from_guest )
> Generally I dislike such redundancy (
> if ( cond1 && cond2 )
> return;
> if ( !cond1 )
> ...
> which can be written without checking cond1 twice) - do you really
> think it is beneficial for overall readability to have it that way?
I thought it was more readable as the first clause means that we are in
a quick VPMU load from context switch (and so we do a return from it)
while the second is a part of a full VPMU load.
>
>> + {
>> + unsigned int num_enabled = 0;
>> + struct xen_pmu_amd_ctxt *guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
>> +
>> + ASSERT(!is_hvm_vcpu(v));
>> +
>> + ctxt = vpmu->context;
>> + ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
>> +
>> + memcpy(&ctxt->regs[0], &guest_ctxt->regs[0], regs_sz);
> So that's the live structure, not any staging area iiuc. What state
> is the guest going to be in when validation fails (and you can't
> restore the original state)? And what guarantees that nothing
> elsewhere in the hypervisor uses the data _before_ the
> validation below succeeds?
We don't load this data into hardware until we have validated it. On
failed validation guest will receive hypercall error --- it's up to the
guest to decide what to do.
The hypervisor will not use this data as it will still be flagged as
PMU_CACHED, i.e. invalid/stale. (That's why I say in the comment below
that re-initializing it is really not necessary)
>
>> + for ( i = 0; i < num_counters; i++ )
>> + {
>> + if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] )
>> + {
>> + /*
>> + * Not necessary to re-init context since we should never load
>> + * it until guest provides valid values. But just to be safe.
>> + */
>> + amd_vpmu_init_regs(ctxt);
>> + return -EINVAL;
>> + }
>> +
>> + if ( is_pmu_enabled(ctrl_regs[i]) )
>> + num_enabled++;
>> + }
>> +
>> + if ( num_enabled )
> Looks like a boolean flag would do - the exact count doesn't seem to
> be of interest here or in later patches?
So the reason why I use a counter here is because keeping track of
VPMU_RUNNING state is currently broken on AMD, I noticed it when I was
updating this patch. amd_vpmu_do_wrmsr() will reset VPMU_RUNNING if the
MSR write is disabling current counter, even though there may still be
other counters running. This may be related to HVM brokenness of AMD
counters that I mentioned a while ago where the guest, when running with
multiple counters, sometimes gets unexpected NMIs. (This goes back all
the way to to 4.1.)
I don't want to fix this in this series but I will likely need to count
number of active counters when I do (just like I do for Intel).
I can use a boolean now though since I am not dealing with this problem
here.
-boris
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests
2015-06-15 17:17 ` Boris Ostrovsky
@ 2015-06-16 7:45 ` Jan Beulich
2015-06-16 14:14 ` Boris Ostrovsky
0 siblings, 1 reply; 36+ messages in thread
From: Jan Beulich @ 2015-06-16 7:45 UTC (permalink / raw)
To: Boris Ostrovsky
Cc: kevin.tian, suravee.suthikulpanit, andrew.cooper3, tim,
dietmar.hahn, xen-devel, Aravind.Gopalakrishnan, jun.nakajima,
dgdegra
>>> On 15.06.15 at 19:17, <boris.ostrovsky@oracle.com> wrote:
> On 06/15/2015 11:50 AM, Jan Beulich wrote:
>>>>> On 10.06.15 at 17:04, <boris.ostrovsky@oracle.com> wrote:
>>> + {
>>> + unsigned int num_enabled = 0;
>>> + struct xen_pmu_amd_ctxt *guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
>>> +
>>> + ASSERT(!is_hvm_vcpu(v));
>>> +
>>> + ctxt = vpmu->context;
>>> + ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
>>> +
>>> + memcpy(&ctxt->regs[0], &guest_ctxt->regs[0], regs_sz);
>> So that's the live structure, not any staging area iiuc. What state
>> is the guest going to be in when validation fails (and you can't
>> restore the original state)? And what guarantees that nothing
>> elsewhere in the hypervisor uses the data _before_ the
>> validation below succeeds?
>
>
> We don't load this data into hardware until we have validated it. On
> failed validation guest will receive hypercall error --- it's up to the
> guest to decide what to do.
>
> The hypervisor will not use this data as it will still be flagged as
> PMU_CACHED, i.e. invalid/stale. (That's why I say in the comment below
> that re-initializing it is really not necessary)
Okay, thanks.
>>> + for ( i = 0; i < num_counters; i++ )
>>> + {
>>> + if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] )
>>> + {
>>> + /*
>>> + * Not necessary to re-init context since we should never load
>>> + * it until guest provides valid values. But just to be safe.
>>> + */
>>> + amd_vpmu_init_regs(ctxt);
>>> + return -EINVAL;
>>> + }
>>> +
>>> + if ( is_pmu_enabled(ctrl_regs[i]) )
>>> + num_enabled++;
>>> + }
>>> +
>>> + if ( num_enabled )
>> Looks like a boolean flag would do - the exact count doesn't seem to
>> be of interest here or in later patches?
>
> So the reason why I use a counter here is because keeping track of
> VPMU_RUNNING state is currently broken on AMD, I noticed it when I was
> updating this patch. amd_vpmu_do_wrmsr() will reset VPMU_RUNNING if the
> MSR write is disabling current counter, even though there may still be
> other counters running. This may be related to HVM brokenness of AMD
> counters that I mentioned a while ago where the guest, when running with
> multiple counters, sometimes gets unexpected NMIs. (This goes back all
> the way to to 4.1.)
>
> I don't want to fix this in this series but I will likely need to count
> number of active counters when I do (just like I do for Intel).
>
> I can use a boolean now though since I am not dealing with this problem
> here.
If another rev is needed, I'd prefer if you did so. But if we can have
this version go in (provided we get all the necessary acks), I wouldn't
insist on you doing another round just because of this.
Jan
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests
2015-06-16 7:45 ` Jan Beulich
@ 2015-06-16 14:14 ` Boris Ostrovsky
0 siblings, 0 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-16 14:14 UTC (permalink / raw)
To: Jan Beulich
Cc: kevin.tian, suravee.suthikulpanit, andrew.cooper3, tim,
dietmar.hahn, xen-devel, Aravind.Gopalakrishnan, jun.nakajima,
dgdegra
On 06/16/2015 03:45 AM, Jan Beulich wrote:
>>>> On 15.06.15 at 19:17, <boris.ostrovsky@oracle.com> wrote:
>>>>
>>>> + for ( i = 0; i < num_counters; i++ )
>>>> + {
>>>> + if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] )
>>>> + {
>>>> + /*
>>>> + * Not necessary to re-init context since we should never load
>>>> + * it until guest provides valid values. But just to be safe.
>>>> + */
>>>> + amd_vpmu_init_regs(ctxt);
>>>> + return -EINVAL;
>>>> + }
>>>> +
>>>> + if ( is_pmu_enabled(ctrl_regs[i]) )
>>>> + num_enabled++;
>>>> + }
>>>> +
>>>> + if ( num_enabled )
>>> Looks like a boolean flag would do - the exact count doesn't seem to
>>> be of interest here or in later patches?
>> So the reason why I use a counter here is because keeping track of
>> VPMU_RUNNING state is currently broken on AMD, I noticed it when I was
>> updating this patch. amd_vpmu_do_wrmsr() will reset VPMU_RUNNING if the
>> MSR write is disabling current counter, even though there may still be
>> other counters running. This may be related to HVM brokenness of AMD
>> counters that I mentioned a while ago where the guest, when running with
>> multiple counters, sometimes gets unexpected NMIs. (This goes back all
>> the way to to 4.1.)
>>
>> I don't want to fix this in this series but I will likely need to count
>> number of active counters when I do (just like I do for Intel).
>>
>> I can use a boolean now though since I am not dealing with this problem
>> here.
> If another rev is needed, I'd prefer if you did so. But if we can have
> this version go in (provided we get all the necessary acks), I wouldn't
> insist on you doing another round just because of this.
I think there are a couple of (fairly cosmetic) changes that need to be
done so there will be another rev.
OTOH I just tried a quick-and-dirty fix for this problem and it doesn't
resolve it so presumably there is more to this. It's not particularly
invasive but I think it would be rather pointless to put it in as it
still doesn't allow multiple counters on AMD and I suspect the final fix
will be touching the same code again.
-boris
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v24 13/15] x86/VPMU: Merge vpmu_rdmsr and vpmu_wrmsr
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
` (11 preceding siblings ...)
2015-06-10 15:04 ` [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-10 15:04 ` [PATCH v24 14/15] x86/VPMU: Add privileged PMU mode Boris Ostrovsky
2015-06-10 15:04 ` [PATCH v24 15/15] x86/VPMU: Move VPMU files up from hvm/ directory Boris Ostrovsky
14 siblings, 0 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
The two routines share most of their logic.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
---
xen/arch/x86/hvm/vpmu.c | 75 ++++++++++++++++--------------------------
xen/include/asm-x86/hvm/vpmu.h | 14 ++++++--
2 files changed, 41 insertions(+), 48 deletions(-)
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index 37e541b..fc1b228 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -100,63 +100,46 @@ void vpmu_lvtpc_update(uint32_t val)
apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
}
-int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported)
+int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
+ uint64_t supported, bool_t is_write)
{
struct vcpu *curr = current;
struct vpmu_struct *vpmu;
+ const struct arch_vpmu_ops *ops;
+ int ret = 0;
- if ( vpmu_mode == XENPMU_MODE_OFF )
- return 0;
+ if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
+ goto nop;
vpmu = vcpu_vpmu(curr);
- if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
- {
- int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
-
- /*
- * We may have received a PMU interrupt during WRMSR handling
- * and since do_wrmsr may load VPMU context we should save
- * (and unload) it again.
- */
- if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
- vpmu_is_set(vpmu, VPMU_CACHED) )
- {
- vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
- vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0);
- vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
- }
- return ret;
- }
-
- return 0;
-}
-
-int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
-{
- struct vcpu *curr = current;
- struct vpmu_struct *vpmu;
+ ops = vpmu->arch_vpmu_ops;
+ if ( !ops )
+ goto nop;
+
+ if ( is_write && ops->do_wrmsr )
+ ret = ops->do_wrmsr(msr, *msr_content, supported);
+ else if ( !is_write && ops->do_rdmsr )
+ ret = ops->do_rdmsr(msr, msr_content);
+ else
+ goto nop;
- if ( vpmu_mode == XENPMU_MODE_OFF )
+ /*
+ * We may have received a PMU interrupt while handling MSR access
+ * and since do_wr/rdmsr may load VPMU context we should save
+ * (and unload) it again.
+ */
+ if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
+ vpmu_is_set(vpmu, VPMU_CACHED) )
{
- *msr_content = 0;
- return 0;
+ vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+ ops->arch_vpmu_save(curr, 0);
+ vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
}
- vpmu = vcpu_vpmu(curr);
- if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
- {
- int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
+ return ret;
- if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
- vpmu_is_set(vpmu, VPMU_CACHED) )
- {
- vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
- vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0);
- vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
- }
- return ret;
- }
- else
+ nop:
+ if ( !is_write )
*msr_content = 0;
return 0;
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index f486d2f..212e496 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -101,8 +101,8 @@ static inline bool_t vpmu_are_all_set(const struct vpmu_struct *vpmu,
}
void vpmu_lvtpc_update(uint32_t val);
-int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported);
-int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content);
+int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
+ uint64_t supported, bool_t is_write);
void vpmu_do_interrupt(struct cpu_user_regs *regs);
void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
@@ -112,6 +112,16 @@ void vpmu_save(struct vcpu *v);
int vpmu_load(struct vcpu *v, bool_t from_guest);
void vpmu_dump(struct vcpu *v);
+static inline int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
+ uint64_t supported)
+{
+ return vpmu_do_msr(msr, &msr_content, supported, 1);
+}
+static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
+{
+ return vpmu_do_msr(msr, msr_content, 0, 0);
+}
+
extern int acquire_pmu_ownership(int pmu_ownership);
extern void release_pmu_ownership(int pmu_ownership);
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v24 14/15] x86/VPMU: Add privileged PMU mode
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
` (12 preceding siblings ...)
2015-06-10 15:04 ` [PATCH v24 13/15] x86/VPMU: Merge vpmu_rdmsr and vpmu_wrmsr Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-10 15:04 ` [PATCH v24 15/15] x86/VPMU: Move VPMU files up from hvm/ directory Boris Ostrovsky
14 siblings, 0 replies; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
Add support for privileged PMU mode (XENPMU_MODE_ALL) which allows privileged
domain (dom0) profile both itself (and the hypervisor) and the guests. While
this mode is on profiling in guests is disabled.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/hvm/vpmu.c | 40 +++++++++++++++++++++++++++++-----------
xen/arch/x86/traps.c | 13 +++++++++++++
xen/include/public/pmu.h | 3 +++
3 files changed, 45 insertions(+), 11 deletions(-)
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index fc1b228..0eda20a 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -108,8 +108,10 @@ int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
const struct arch_vpmu_ops *ops;
int ret = 0;
- if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
- goto nop;
+ if ( likely(vpmu_mode == XENPMU_MODE_OFF) ||
+ ((vpmu_mode & XENPMU_MODE_ALL) &&
+ !is_hardware_domain(current->domain)) )
+ goto nop;
vpmu = vcpu_vpmu(curr);
ops = vpmu->arch_vpmu_ops;
@@ -164,8 +166,12 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
struct vlapic *vlapic;
u32 vlapic_lvtpc;
- /* dom0 will handle interrupt for special domains (e.g. idle domain) */
- if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED )
+ /*
+ * dom0 will handle interrupt for special domains (e.g. idle domain) or,
+ * in XENPMU_MODE_ALL, for everyone.
+ */
+ if ( (vpmu_mode & XENPMU_MODE_ALL) ||
+ (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
{
sampling = choose_hwdom_vcpu();
if ( !sampling )
@@ -179,16 +185,17 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
return;
/* PV(H) guest */
- if ( !is_hvm_vcpu(sampling) )
+ if ( !is_hvm_vcpu(sampling) || (vpmu_mode & XENPMU_MODE_ALL) )
{
const struct cpu_user_regs *cur_regs;
uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
- domid_t domid = DOMID_SELF;
+ domid_t domid;
if ( !vpmu->xenpmu_data )
return;
if ( is_pvh_vcpu(sampling) &&
+ !(vpmu_mode & XENPMU_MODE_ALL) &&
!vpmu->arch_vpmu_ops->do_interrupt(regs) )
return;
@@ -205,6 +212,11 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
else
*flags = PMU_SAMPLE_PV;
+ if ( sampled == sampling )
+ domid = DOMID_SELF;
+ else
+ domid = sampled->domain->domain_id;
+
/* Store appropriate registers in xenpmu_data */
/* FIXME: 32-bit PVH should go here as well */
if ( is_pv_32bit_vcpu(sampling) )
@@ -233,7 +245,8 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
if ( (vpmu_mode & XENPMU_MODE_SELF) )
cur_regs = guest_cpu_user_regs();
- else if ( !guest_mode(regs) && is_hardware_domain(sampling->domain) )
+ else if ( !guest_mode(regs) &&
+ is_hardware_domain(sampling->domain) )
{
cur_regs = regs;
domid = DOMID_XEN;
@@ -472,7 +485,9 @@ void vpmu_initialise(struct vcpu *v)
printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
/* Intel needs to initialize VPMU ops even if VPMU is not in use */
- if ( !is_priv_vpmu && (ret || (vpmu_mode == XENPMU_MODE_OFF)) )
+ if ( !is_priv_vpmu &&
+ (ret || (vpmu_mode == XENPMU_MODE_OFF) ||
+ (vpmu_mode == XENPMU_MODE_ALL)) )
{
spin_lock(&vpmu_lock);
vpmu_count--;
@@ -525,7 +540,8 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
struct page_info *page;
uint64_t gfn = params->val;
- if ( vpmu_mode == XENPMU_MODE_OFF )
+ if ( (vpmu_mode == XENPMU_MODE_OFF) ||
+ ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) )
return -EINVAL;
if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
@@ -645,12 +661,14 @@ long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
{
case XENPMU_mode_set:
{
- if ( (pmu_params.val & ~(XENPMU_MODE_SELF | XENPMU_MODE_HV)) ||
+ if ( (pmu_params.val &
+ ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) ||
(hweight64(pmu_params.val) > 1) )
return -EINVAL;
/* 32-bit dom0 can only sample itself. */
- if ( is_pv_32bit_vcpu(current) && (pmu_params.val & XENPMU_MODE_HV) )
+ if ( is_pv_32bit_vcpu(current) &&
+ (pmu_params.val & (XENPMU_MODE_HV | XENPMU_MODE_ALL)) )
return -EINVAL;
spin_lock(&vpmu_lock);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 0b26100..4a79931 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2656,6 +2656,10 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
case MSR_AMD_FAM15H_EVNTSEL0...MSR_AMD_FAM15H_PERFCTR5:
if ( vpmu_msr || (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
{
+ if ( (vpmu_mode & XENPMU_MODE_ALL) &&
+ !is_hardware_domain(v->domain) )
+ break;
+
if ( vpmu_do_wrmsr(regs->ecx, msr_content, 0) )
goto fail;
}
@@ -2779,6 +2783,15 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
case MSR_AMD_FAM15H_EVNTSEL0...MSR_AMD_FAM15H_PERFCTR5:
if ( vpmu_msr || (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
{
+
+ if ( (vpmu_mode & XENPMU_MODE_ALL) &&
+ !is_hardware_domain(v->domain) )
+ {
+ /* Don't leak PMU MSRs to unprivileged domains */
+ regs->eax = regs->edx = 0;
+ break;
+ }
+
if ( vpmu_do_rdmsr(regs->ecx, &val) )
goto fail;
diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
index 7a45783..1149678 100644
--- a/xen/include/public/pmu.h
+++ b/xen/include/public/pmu.h
@@ -52,10 +52,13 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t);
* - XENPMU_MODE_SELF: Guests can profile themselves
* - XENPMU_MODE_HV: Guests can profile themselves, dom0 profiles
* itself and Xen
+ * - XENPMU_MODE_ALL: Only dom0 has access to VPMU and it profiles
+ * everyone: itself, the hypervisor and the guests.
*/
#define XENPMU_MODE_OFF 0
#define XENPMU_MODE_SELF (1<<0)
#define XENPMU_MODE_HV (1<<1)
+#define XENPMU_MODE_ALL (1<<2)
/*
* PMU features:
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v24 15/15] x86/VPMU: Move VPMU files up from hvm/ directory
2015-06-10 15:04 [PATCH v24 00/15] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
` (13 preceding siblings ...)
2015-06-10 15:04 ` [PATCH v24 14/15] x86/VPMU: Add privileged PMU mode Boris Ostrovsky
@ 2015-06-10 15:04 ` Boris Ostrovsky
2015-06-11 8:39 ` Tian, Kevin
14 siblings, 1 reply; 36+ messages in thread
From: Boris Ostrovsky @ 2015-06-10 15:04 UTC (permalink / raw)
To: JBeulich, kevin.tian, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: boris.ostrovsky, tim, jun.nakajima, xen-devel
Since PMU is now not HVM specific we can move VPMU-related files up from
arch/x86/hvm/ directory.
Specifically:
arch/x86/hvm/vpmu.c -> arch/x86/cpu/vpmu.c
arch/x86/hvm/svm/vpmu.c -> arch/x86/cpu/vpmu_amd.c
arch/x86/hvm/vmx/vpmu_core2.c -> arch/x86/cpu/vpmu_intel.c
include/asm-x86/hvm/vpmu.h -> include/asm-x86/vpmu.h
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
Tested-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
---
xen/arch/x86/cpu/Makefile | 1 +
xen/arch/x86/{hvm => cpu}/vpmu.c | 2 +-
xen/arch/x86/{hvm/svm/vpmu.c => cpu/vpmu_amd.c} | 2 +-
xen/arch/x86/{hvm/vmx/vpmu_core2.c => cpu/vpmu_intel.c} | 2 +-
xen/arch/x86/hvm/Makefile | 1 -
xen/arch/x86/hvm/svm/Makefile | 1 -
xen/arch/x86/hvm/vlapic.c | 2 +-
xen/arch/x86/hvm/vmx/Makefile | 1 -
xen/arch/x86/oprofile/op_model_ppro.c | 2 +-
xen/arch/x86/traps.c | 2 +-
xen/include/asm-x86/hvm/vmx/vmcs.h | 2 +-
xen/include/asm-x86/{hvm => }/vpmu.h | 0
12 files changed, 8 insertions(+), 10 deletions(-)
rename xen/arch/x86/{hvm => cpu}/vpmu.c (99%)
rename xen/arch/x86/{hvm/svm/vpmu.c => cpu/vpmu_amd.c} (99%)
rename xen/arch/x86/{hvm/vmx/vpmu_core2.c => cpu/vpmu_intel.c} (99%)
rename xen/include/asm-x86/{hvm => }/vpmu.h (100%)
diff --git a/xen/arch/x86/cpu/Makefile b/xen/arch/x86/cpu/Makefile
index d73d93a..74f23ae 100644
--- a/xen/arch/x86/cpu/Makefile
+++ b/xen/arch/x86/cpu/Makefile
@@ -7,3 +7,4 @@ obj-y += common.o
obj-y += intel.o
obj-y += intel_cacheinfo.o
obj-y += mwait-idle.o
+obj-y += vpmu.o vpmu_amd.o vpmu_intel.o
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/cpu/vpmu.c
similarity index 99%
rename from xen/arch/x86/hvm/vpmu.c
rename to xen/arch/x86/cpu/vpmu.c
index 0eda20a..7137033 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/cpu/vpmu.c
@@ -28,10 +28,10 @@
#include <asm/msr.h>
#include <asm/nmi.h>
#include <asm/p2m.h>
+#include <asm/vpmu.h>
#include <asm/hvm/support.h>
#include <asm/hvm/vmx/vmx.h>
#include <asm/hvm/vmx/vmcs.h>
-#include <asm/hvm/vpmu.h>
#include <asm/hvm/svm/svm.h>
#include <asm/hvm/svm/vmcb.h>
#include <asm/apic.h>
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/cpu/vpmu_amd.c
similarity index 99%
rename from xen/arch/x86/hvm/svm/vpmu.c
rename to xen/arch/x86/cpu/vpmu_amd.c
index b93d31d..759cd06 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/cpu/vpmu_amd.c
@@ -28,8 +28,8 @@
#include <xen/sched.h>
#include <xen/irq.h>
#include <asm/apic.h>
+#include <asm/vpmu.h>
#include <asm/hvm/vlapic.h>
-#include <asm/hvm/vpmu.h>
#include <public/pmu.h>
#define MSR_F10H_EVNTSEL_GO_SHIFT 40
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/cpu/vpmu_intel.c
similarity index 99%
rename from xen/arch/x86/hvm/vmx/vpmu_core2.c
rename to xen/arch/x86/cpu/vpmu_intel.c
index 1206e90..423ca82 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -30,6 +30,7 @@
#include <asm/traps.h>
#include <asm/msr.h>
#include <asm/msr-index.h>
+#include <asm/vpmu.h>
#include <asm/hvm/support.h>
#include <asm/hvm/vlapic.h>
#include <asm/hvm/vmx/vmx.h>
@@ -37,7 +38,6 @@
#include <public/sched.h>
#include <public/hvm/save.h>
#include <public/pmu.h>
-#include <asm/hvm/vpmu.h>
/*
* See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index 69af47f..794e793 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -23,4 +23,3 @@ obj-y += vlapic.o
obj-y += vmsi.o
obj-y += vpic.o
obj-y += vpt.o
-obj-y += vpmu.o
diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
index a10a55e..760d295 100644
--- a/xen/arch/x86/hvm/svm/Makefile
+++ b/xen/arch/x86/hvm/svm/Makefile
@@ -6,4 +6,3 @@ obj-y += nestedsvm.o
obj-y += svm.o
obj-y += svmdebug.o
obj-y += vmcb.o
-obj-y += vpmu.o
diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index 92b0fa8..e03d670 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -33,12 +33,12 @@
#include <asm/page.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
+#include <asm/vpmu.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/io.h>
#include <asm/hvm/support.h>
#include <asm/hvm/vmx/vmx.h>
#include <asm/hvm/nestedhvm.h>
-#include <asm/hvm/vpmu.h>
#include <public/hvm/ioreq.h>
#include <public/hvm/params.h>
diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
index 373b3d9..04a29ce 100644
--- a/xen/arch/x86/hvm/vmx/Makefile
+++ b/xen/arch/x86/hvm/vmx/Makefile
@@ -3,5 +3,4 @@ obj-y += intr.o
obj-y += realmode.o
obj-y += vmcs.o
obj-y += vmx.o
-obj-y += vpmu_core2.o
obj-y += vvmx.o
diff --git a/xen/arch/x86/oprofile/op_model_ppro.c b/xen/arch/x86/oprofile/op_model_ppro.c
index ca429a1..89649d0 100644
--- a/xen/arch/x86/oprofile/op_model_ppro.c
+++ b/xen/arch/x86/oprofile/op_model_ppro.c
@@ -19,7 +19,7 @@
#include <asm/processor.h>
#include <asm/regs.h>
#include <asm/current.h>
-#include <asm/hvm/vpmu.h>
+#include <asm/vpmu.h>
#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 4a79931..46ea81b 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -72,7 +72,7 @@
#include <asm/apic.h>
#include <asm/mc146818rtc.h>
#include <asm/hpet.h>
-#include <asm/hvm/vpmu.h>
+#include <asm/vpmu.h>
#include <public/arch-x86/cpuid.h>
#include <xsm/xsm.h>
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 1104bda..3132644 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -19,8 +19,8 @@
#ifndef __ASM_X86_HVM_VMX_VMCS_H__
#define __ASM_X86_HVM_VMX_VMCS_H__
+#include <asm/vpmu.h>
#include <asm/hvm/io.h>
-#include <asm/hvm/vpmu.h>
#include <irq_vectors.h>
extern void vmcs_dump_vcpu(struct vcpu *v);
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/vpmu.h
similarity index 100%
rename from xen/include/asm-x86/hvm/vpmu.h
rename to xen/include/asm-x86/vpmu.h
--
1.8.1.4
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v24 15/15] x86/VPMU: Move VPMU files up from hvm/ directory
2015-06-10 15:04 ` [PATCH v24 15/15] x86/VPMU: Move VPMU files up from hvm/ directory Boris Ostrovsky
@ 2015-06-11 8:39 ` Tian, Kevin
0 siblings, 0 replies; 36+ messages in thread
From: Tian, Kevin @ 2015-06-11 8:39 UTC (permalink / raw)
To: Boris Ostrovsky, JBeulich, suravee.suthikulpanit,
Aravind.Gopalakrishnan, dietmar.hahn, dgdegra, andrew.cooper3
Cc: tim, Nakajima, Jun, xen-devel
> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> Sent: Wednesday, June 10, 2015 11:05 PM
>
> Since PMU is now not HVM specific we can move VPMU-related files up from
> arch/x86/hvm/ directory.
>
> Specifically:
> arch/x86/hvm/vpmu.c -> arch/x86/cpu/vpmu.c
> arch/x86/hvm/svm/vpmu.c -> arch/x86/cpu/vpmu_amd.c
> arch/x86/hvm/vmx/vpmu_core2.c -> arch/x86/cpu/vpmu_intel.c
> include/asm-x86/hvm/vpmu.h -> include/asm-x86/vpmu.h
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Acked-by: Jan Beulich <jbeulich@suse.com>
> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Reviewed-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
> Tested-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
> ---
> xen/arch/x86/cpu/Makefile | 1 +
> xen/arch/x86/{hvm => cpu}/vpmu.c | 2 +-
> xen/arch/x86/{hvm/svm/vpmu.c => cpu/vpmu_amd.c} | 2 +-
> xen/arch/x86/{hvm/vmx/vpmu_core2.c => cpu/vpmu_intel.c} | 2 +-
> xen/arch/x86/hvm/Makefile | 1 -
> xen/arch/x86/hvm/svm/Makefile | 1 -
> xen/arch/x86/hvm/vlapic.c | 2 +-
> xen/arch/x86/hvm/vmx/Makefile | 1 -
> xen/arch/x86/oprofile/op_model_ppro.c | 2 +-
> xen/arch/x86/traps.c | 2 +-
> xen/include/asm-x86/hvm/vmx/vmcs.h | 2 +-
> xen/include/asm-x86/{hvm => }/vpmu.h | 0
> 12 files changed, 8 insertions(+), 10 deletions(-)
> rename xen/arch/x86/{hvm => cpu}/vpmu.c (99%)
> rename xen/arch/x86/{hvm/svm/vpmu.c => cpu/vpmu_amd.c} (99%)
> rename xen/arch/x86/{hvm/vmx/vpmu_core2.c => cpu/vpmu_intel.c} (99%)
> rename xen/include/asm-x86/{hvm => }/vpmu.h (100%)
>
> diff --git a/xen/arch/x86/cpu/Makefile b/xen/arch/x86/cpu/Makefile
> index d73d93a..74f23ae 100644
> --- a/xen/arch/x86/cpu/Makefile
> +++ b/xen/arch/x86/cpu/Makefile
> @@ -7,3 +7,4 @@ obj-y += common.o
> obj-y += intel.o
> obj-y += intel_cacheinfo.o
> obj-y += mwait-idle.o
> +obj-y += vpmu.o vpmu_amd.o vpmu_intel.o
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/cpu/vpmu.c
> similarity index 99%
> rename from xen/arch/x86/hvm/vpmu.c
> rename to xen/arch/x86/cpu/vpmu.c
> index 0eda20a..7137033 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/cpu/vpmu.c
> @@ -28,10 +28,10 @@
> #include <asm/msr.h>
> #include <asm/nmi.h>
> #include <asm/p2m.h>
> +#include <asm/vpmu.h>
> #include <asm/hvm/support.h>
> #include <asm/hvm/vmx/vmx.h>
> #include <asm/hvm/vmx/vmcs.h>
> -#include <asm/hvm/vpmu.h>
> #include <asm/hvm/svm/svm.h>
> #include <asm/hvm/svm/vmcb.h>
> #include <asm/apic.h>
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/cpu/vpmu_amd.c
> similarity index 99%
> rename from xen/arch/x86/hvm/svm/vpmu.c
> rename to xen/arch/x86/cpu/vpmu_amd.c
> index b93d31d..759cd06 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/cpu/vpmu_amd.c
> @@ -28,8 +28,8 @@
> #include <xen/sched.h>
> #include <xen/irq.h>
> #include <asm/apic.h>
> +#include <asm/vpmu.h>
> #include <asm/hvm/vlapic.h>
> -#include <asm/hvm/vpmu.h>
> #include <public/pmu.h>
>
> #define MSR_F10H_EVNTSEL_GO_SHIFT 40
> diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/cpu/vpmu_intel.c
> similarity index 99%
> rename from xen/arch/x86/hvm/vmx/vpmu_core2.c
> rename to xen/arch/x86/cpu/vpmu_intel.c
> index 1206e90..423ca82 100644
> --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> +++ b/xen/arch/x86/cpu/vpmu_intel.c
> @@ -30,6 +30,7 @@
> #include <asm/traps.h>
> #include <asm/msr.h>
> #include <asm/msr-index.h>
> +#include <asm/vpmu.h>
> #include <asm/hvm/support.h>
> #include <asm/hvm/vlapic.h>
> #include <asm/hvm/vmx/vmx.h>
> @@ -37,7 +38,6 @@
> #include <public/sched.h>
> #include <public/hvm/save.h>
> #include <public/pmu.h>
> -#include <asm/hvm/vpmu.h>
>
> /*
> * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
> diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
> index 69af47f..794e793 100644
> --- a/xen/arch/x86/hvm/Makefile
> +++ b/xen/arch/x86/hvm/Makefile
> @@ -23,4 +23,3 @@ obj-y += vlapic.o
> obj-y += vmsi.o
> obj-y += vpic.o
> obj-y += vpt.o
> -obj-y += vpmu.o
> diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
> index a10a55e..760d295 100644
> --- a/xen/arch/x86/hvm/svm/Makefile
> +++ b/xen/arch/x86/hvm/svm/Makefile
> @@ -6,4 +6,3 @@ obj-y += nestedsvm.o
> obj-y += svm.o
> obj-y += svmdebug.o
> obj-y += vmcb.o
> -obj-y += vpmu.o
> diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
> index 92b0fa8..e03d670 100644
> --- a/xen/arch/x86/hvm/vlapic.c
> +++ b/xen/arch/x86/hvm/vlapic.c
> @@ -33,12 +33,12 @@
> #include <asm/page.h>
> #include <asm/apic.h>
> #include <asm/io_apic.h>
> +#include <asm/vpmu.h>
> #include <asm/hvm/hvm.h>
> #include <asm/hvm/io.h>
> #include <asm/hvm/support.h>
> #include <asm/hvm/vmx/vmx.h>
> #include <asm/hvm/nestedhvm.h>
> -#include <asm/hvm/vpmu.h>
> #include <public/hvm/ioreq.h>
> #include <public/hvm/params.h>
>
> diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
> index 373b3d9..04a29ce 100644
> --- a/xen/arch/x86/hvm/vmx/Makefile
> +++ b/xen/arch/x86/hvm/vmx/Makefile
> @@ -3,5 +3,4 @@ obj-y += intr.o
> obj-y += realmode.o
> obj-y += vmcs.o
> obj-y += vmx.o
> -obj-y += vpmu_core2.o
> obj-y += vvmx.o
> diff --git a/xen/arch/x86/oprofile/op_model_ppro.c
> b/xen/arch/x86/oprofile/op_model_ppro.c
> index ca429a1..89649d0 100644
> --- a/xen/arch/x86/oprofile/op_model_ppro.c
> +++ b/xen/arch/x86/oprofile/op_model_ppro.c
> @@ -19,7 +19,7 @@
> #include <asm/processor.h>
> #include <asm/regs.h>
> #include <asm/current.h>
> -#include <asm/hvm/vpmu.h>
> +#include <asm/vpmu.h>
>
> #include "op_x86_model.h"
> #include "op_counter.h"
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index 4a79931..46ea81b 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -72,7 +72,7 @@
> #include <asm/apic.h>
> #include <asm/mc146818rtc.h>
> #include <asm/hpet.h>
> -#include <asm/hvm/vpmu.h>
> +#include <asm/vpmu.h>
> #include <public/arch-x86/cpuid.h>
> #include <xsm/xsm.h>
>
> diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h
> b/xen/include/asm-x86/hvm/vmx/vmcs.h
> index 1104bda..3132644 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -19,8 +19,8 @@
> #ifndef __ASM_X86_HVM_VMX_VMCS_H__
> #define __ASM_X86_HVM_VMX_VMCS_H__
>
> +#include <asm/vpmu.h>
> #include <asm/hvm/io.h>
> -#include <asm/hvm/vpmu.h>
> #include <irq_vectors.h>
>
> extern void vmcs_dump_vcpu(struct vcpu *v);
> diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/vpmu.h
> similarity index 100%
> rename from xen/include/asm-x86/hvm/vpmu.h
> rename to xen/include/asm-x86/vpmu.h
> --
> 1.8.1.4
^ permalink raw reply [flat|nested] 36+ messages in thread