* [PATCH v2 1/6] xen: xensyms support
2014-06-06 17:44 [PATCH v2 0/6] xen/PMU: PMU support for Xen PV guests Boris Ostrovsky
@ 2014-06-06 17:44 ` Boris Ostrovsky
2014-06-10 13:31 ` David Vrabel
2014-06-11 9:37 ` Dietmar Hahn
2014-06-06 17:44 ` [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode Boris Ostrovsky
` (5 subsequent siblings)
6 siblings, 2 replies; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-06 17:44 UTC (permalink / raw)
To: david.vrabel, konrad.wilk
Cc: boris.ostrovsky, kevin.tian, dietmar.hahn, JBeulich, xen-devel
Export Xen symbols to dom0 via /proc/xen/xensyms (similar to /proc/kallsyms).
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
drivers/xen/Kconfig | 5 ++
drivers/xen/xenfs/Makefile | 1 +
drivers/xen/xenfs/super.c | 3 +
drivers/xen/xenfs/xenfs.h | 1 +
drivers/xen/xenfs/xensyms.c | 124 +++++++++++++++++++++++++++++++++++++++
include/xen/interface/platform.h | 19 ++++++
6 files changed, 153 insertions(+)
create mode 100644 drivers/xen/xenfs/xensyms.c
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 38fb36e..b3d1da7 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -240,4 +240,9 @@ config XEN_MCE_LOG
config XEN_HAVE_PVMMU
bool
+config XEN_SYMS
+ bool "Xen symbols"
+ depends on XEN_DOM0 && XENFS
+ default y if KALLSYMS
+
endmenu
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
index b019865..1a83010 100644
--- a/drivers/xen/xenfs/Makefile
+++ b/drivers/xen/xenfs/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_XENFS) += xenfs.o
xenfs-y = super.o
xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
+xenfs-$(CONFIG_XEN_SYMS) += xensyms.o
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 06092e0..8559a71 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -57,6 +57,9 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
{ "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
{ "xsd_kva", &xsd_kva_file_ops, S_IRUSR|S_IWUSR},
{ "xsd_port", &xsd_port_file_ops, S_IRUSR|S_IWUSR},
+#ifdef CONFIG_XEN_SYMS
+ { "xensyms", &xensyms_ops, S_IRUSR},
+#endif
{""},
};
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
index 6b80c77..2c5934e 100644
--- a/drivers/xen/xenfs/xenfs.h
+++ b/drivers/xen/xenfs/xenfs.h
@@ -3,5 +3,6 @@
extern const struct file_operations xsd_kva_file_ops;
extern const struct file_operations xsd_port_file_ops;
+extern const struct file_operations xensyms_ops;
#endif /* _XENFS_XENBUS_H */
diff --git a/drivers/xen/xenfs/xensyms.c b/drivers/xen/xenfs/xensyms.c
new file mode 100644
index 0000000..748948c
--- /dev/null
+++ b/drivers/xen/xenfs/xensyms.c
@@ -0,0 +1,124 @@
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <xen/interface/platform.h>
+#include <asm/xen/hypercall.h>
+#include <xen/xen-ops.h>
+#include "xenfs.h"
+
+
+#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length */
+
+
+/* Grab next output page from the hypervisor */
+static int xensyms_next_sym(struct xen_platform_op *op)
+{
+ int ret;
+ uint64_t symnum = op->u.symdata.symnum;
+
+ memset(op->u.symdata.name, 0, XEN_KSYM_NAME_LEN + 1);
+
+ ret = HYPERVISOR_dom0_op(op);
+ if (ret < 0)
+ return ret;
+
+ if (op->u.symdata.symnum == symnum)
+ /* End of symbols */
+ return 1;
+
+ return 0;
+}
+
+static void *xensyms_start(struct seq_file *m, loff_t *pos)
+{
+ struct xen_platform_op *op = (struct xen_platform_op *)m->private;
+
+ if (op->u.symdata.symnum != *pos)
+ op->u.symdata.symnum = *pos;
+
+ if (xensyms_next_sym(op))
+ return NULL;
+
+ return m->private;
+}
+
+static void *xensyms_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ struct xen_platform_op *op = (struct xen_platform_op *)m->private;
+
+ (*pos)++;
+
+ if (op->u.symdata.symnum != *pos)
+ op->u.symdata.symnum = *pos;
+
+ if (xensyms_next_sym(op))
+ return NULL;
+
+ return p;
+}
+
+static int xensyms_show(struct seq_file *m, void *p)
+{
+ struct xen_platform_op *op = (struct xen_platform_op *)m->private;
+
+ seq_printf(m, "%016llx %c %s\n", op->u.symdata.address,
+ op->u.symdata.type, op->u.symdata.name);
+
+ return 0;
+}
+
+static void xensyms_stop(struct seq_file *m, void *p)
+{
+}
+
+static const struct seq_operations xensyms_seq_ops = {
+ .start = xensyms_start,
+ .next = xensyms_next,
+ .show = xensyms_show,
+ .stop = xensyms_stop,
+};
+
+static int xensyms_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *m;
+ struct xen_platform_op *op;
+ char *buf;
+ int ret;
+
+ buf = kzalloc(XEN_KSYM_NAME_LEN + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ ret = seq_open_private(file, &xensyms_seq_ops,
+ sizeof(struct xen_platform_op));
+ if (ret)
+ return ret;
+
+ m = file->private_data;
+ op = (struct xen_platform_op *)m->private;
+ set_xen_guest_handle(op->u.symdata.name, buf);
+ op->cmd = XENPF_get_symbol;
+ op->u.symdata.namelen = XEN_KSYM_NAME_LEN + 1;
+
+ return 0;
+}
+
+static int xensyms_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = file->private_data;
+ struct xen_platform_op *op = (struct xen_platform_op *)m->private;
+
+ kfree(op->u.symdata.name);
+ return seq_release_private(inode, file);
+}
+
+const struct file_operations xensyms_ops = {
+ .open = xensyms_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = xensyms_release
+};
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index f1331e3..792e29a 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -352,6 +352,24 @@ struct xenpf_core_parking {
};
DEFINE_GUEST_HANDLE_STRUCT(xenpf_core_parking);
+#define XENPF_get_symbol 61
+struct xenpf_symdata {
+ /* IN/OUT variables */
+ uint32_t namelen; /* size of 'name' buffer */
+
+ /* IN/OUT variables */
+ uint32_t symnum; /* IN: Symbol to read */
+ /* OUT: Next available symbol. If same as IN then */
+ /* we reached the end */
+
+ /* OUT variables */
+ char type;
+ uint64_t address;
+ GUEST_HANDLE(char) name;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_symdata);
+
+
struct xen_platform_op {
uint32_t cmd;
uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -372,6 +390,7 @@ struct xen_platform_op {
struct xenpf_cpu_hotadd cpu_add;
struct xenpf_mem_hotadd mem_add;
struct xenpf_core_parking core_parking;
+ struct xenpf_symdata symdata;
uint8_t pad[128];
} u;
};
--
1.8.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH v2 1/6] xen: xensyms support
2014-06-06 17:44 ` [PATCH v2 1/6] xen: xensyms support Boris Ostrovsky
@ 2014-06-10 13:31 ` David Vrabel
2014-06-10 14:49 ` Boris Ostrovsky
2014-06-11 9:37 ` Dietmar Hahn
1 sibling, 1 reply; 30+ messages in thread
From: David Vrabel @ 2014-06-10 13:31 UTC (permalink / raw)
To: Boris Ostrovsky, konrad.wilk
Cc: kevin.tian, dietmar.hahn, JBeulich, xen-devel
On 06/06/14 18:44, Boris Ostrovsky wrote:
> Export Xen symbols to dom0 via /proc/xen/xensyms (similar to /proc/kallsyms).
[...]
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -240,4 +240,9 @@ config XEN_MCE_LOG
> config XEN_HAVE_PVMMU
> bool
>
> +config XEN_SYMS
> + bool "Xen symbols"
> + depends on XEN_DOM0 && XENFS
> + default y if KALLSYMS
This needs documentation.
> --- /dev/null
> +++ b/drivers/xen/xenfs/xensyms.c
> @@ -0,0 +1,124 @@
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/seq_file.h>
> +#include <linux/fs.h>
> +#include <linux/mm.h>
> +#include <linux/proc_fs.h>
> +#include <linux/slab.h>
> +#include <xen/interface/platform.h>
> +#include <asm/xen/hypercall.h>
> +#include <xen/xen-ops.h>
> +#include "xenfs.h"
> +
> +
> +#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length */
Shouldn't this be exported in the hypervisor headers then?
> +static int xensyms_release(struct inode *inode, struct file *file)
> +{
> + struct seq_file *m = file->private_data;
> + struct xen_platform_op *op = (struct xen_platform_op *)m->private;
> +
> + kfree(op->u.symdata.name);
Isn't op->u.symdata.name a guest handle? I think you need to extract
the pointer here?
David
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 1/6] xen: xensyms support
2014-06-10 13:31 ` David Vrabel
@ 2014-06-10 14:49 ` Boris Ostrovsky
2014-06-10 14:51 ` Jan Beulich
0 siblings, 1 reply; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-10 14:49 UTC (permalink / raw)
To: David Vrabel; +Cc: kevin.tian, xen-devel, dietmar.hahn, JBeulich
On 06/10/2014 09:31 AM, David Vrabel wrote:
>
>> --- /dev/null
>> +++ b/drivers/xen/xenfs/xensyms.c
>> @@ -0,0 +1,124 @@
>> +#include <linux/module.h>
>> +#include <linux/init.h>
>> +#include <linux/seq_file.h>
>> +#include <linux/fs.h>
>> +#include <linux/mm.h>
>> +#include <linux/proc_fs.h>
>> +#include <linux/slab.h>
>> +#include <xen/interface/platform.h>
>> +#include <asm/xen/hypercall.h>
>> +#include <xen/xen-ops.h>
>> +#include "xenfs.h"
>> +
>> +
>> +#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length */
> Shouldn't this be exported in the hypervisor headers then?
Jan objected to having this as part of the interface so now we pass this
as a parameter to the hypervisor. I may return Xen's symbol length and,
if it is larger than XEN_KSYM_NAME_LEN, do a WARN_ONCE() (in the next
spin of the patch).
-boris
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 1/6] xen: xensyms support
2014-06-10 14:49 ` Boris Ostrovsky
@ 2014-06-10 14:51 ` Jan Beulich
2014-06-10 15:03 ` Boris Ostrovsky
0 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2014-06-10 14:51 UTC (permalink / raw)
To: David Vrabel, Boris Ostrovsky; +Cc: kevin.tian, dietmar.hahn, xen-devel
>>> On 10.06.14 at 16:49, <boris.ostrovsky@oracle.com> wrote:
> On 06/10/2014 09:31 AM, David Vrabel wrote:
>>
>>> --- /dev/null
>>> +++ b/drivers/xen/xenfs/xensyms.c
>>> @@ -0,0 +1,124 @@
>>> +#include <linux/module.h>
>>> +#include <linux/init.h>
>>> +#include <linux/seq_file.h>
>>> +#include <linux/fs.h>
>>> +#include <linux/mm.h>
>>> +#include <linux/proc_fs.h>
>>> +#include <linux/slab.h>
>>> +#include <xen/interface/platform.h>
>>> +#include <asm/xen/hypercall.h>
>>> +#include <xen/xen-ops.h>
>>> +#include "xenfs.h"
>>> +
>>> +
>>> +#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length
> */
>> Shouldn't this be exported in the hypervisor headers then?
>
>
> Jan objected to having this as part of the interface so now we pass this
> as a parameter to the hypervisor. I may return Xen's symbol length and,
> if it is larger than XEN_KSYM_NAME_LEN, do a WARN_ONCE() (in the next
> spin of the patch).
The result of my objection should be you not hardcoding any number...
Jan
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 1/6] xen: xensyms support
2014-06-10 14:51 ` Jan Beulich
@ 2014-06-10 15:03 ` Boris Ostrovsky
2014-06-10 15:21 ` Jan Beulich
0 siblings, 1 reply; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-10 15:03 UTC (permalink / raw)
To: Jan Beulich; +Cc: kevin.tian, David Vrabel, dietmar.hahn, xen-devel
On 06/10/2014 10:51 AM, Jan Beulich wrote:
>>>> On 10.06.14 at 16:49, <boris.ostrovsky@oracle.com> wrote:
>> On 06/10/2014 09:31 AM, David Vrabel wrote:
>>>> --- /dev/null
>>>> +++ b/drivers/xen/xenfs/xensyms.c
>>>> @@ -0,0 +1,124 @@
>>>> +#include <linux/module.h>
>>>> +#include <linux/init.h>
>>>> +#include <linux/seq_file.h>
>>>> +#include <linux/fs.h>
>>>> +#include <linux/mm.h>
>>>> +#include <linux/proc_fs.h>
>>>> +#include <linux/slab.h>
>>>> +#include <xen/interface/platform.h>
>>>> +#include <asm/xen/hypercall.h>
>>>> +#include <xen/xen-ops.h>
>>>> +#include "xenfs.h"
>>>> +
>>>> +
>>>> +#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length
>> */
>>> Shouldn't this be exported in the hypervisor headers then?
>>
>> Jan objected to having this as part of the interface so now we pass this
>> as a parameter to the hypervisor. I may return Xen's symbol length and,
>> if it is larger than XEN_KSYM_NAME_LEN, do a WARN_ONCE() (in the next
>> spin of the patch).
> The result of my objection should be you not hardcoding any number...
I guess I can query hypervisor's symbol size by passing to
XENPF_get_symbol current symbol number as -1 (or some other token).
-boris
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 1/6] xen: xensyms support
2014-06-10 15:03 ` Boris Ostrovsky
@ 2014-06-10 15:21 ` Jan Beulich
2014-06-10 15:45 ` Boris Ostrovsky
0 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2014-06-10 15:21 UTC (permalink / raw)
To: Boris Ostrovsky; +Cc: kevin.tian, David Vrabel, dietmar.hahn, xen-devel
>>> On 10.06.14 at 17:03, <boris.ostrovsky@oracle.com> wrote:
> On 06/10/2014 10:51 AM, Jan Beulich wrote:
>>>>> On 10.06.14 at 16:49, <boris.ostrovsky@oracle.com> wrote:
>>> On 06/10/2014 09:31 AM, David Vrabel wrote:
>>>>> --- /dev/null
>>>>> +++ b/drivers/xen/xenfs/xensyms.c
>>>>> @@ -0,0 +1,124 @@
>>>>> +#include <linux/module.h>
>>>>> +#include <linux/init.h>
>>>>> +#include <linux/seq_file.h>
>>>>> +#include <linux/fs.h>
>>>>> +#include <linux/mm.h>
>>>>> +#include <linux/proc_fs.h>
>>>>> +#include <linux/slab.h>
>>>>> +#include <xen/interface/platform.h>
>>>>> +#include <asm/xen/hypercall.h>
>>>>> +#include <xen/xen-ops.h>
>>>>> +#include "xenfs.h"
>>>>> +
>>>>> +
>>>>> +#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length
>>> */
>>>> Shouldn't this be exported in the hypervisor headers then?
>>>
>>> Jan objected to having this as part of the interface so now we pass this
>>> as a parameter to the hypervisor. I may return Xen's symbol length and,
>>> if it is larger than XEN_KSYM_NAME_LEN, do a WARN_ONCE() (in the next
>>> spin of the patch).
>> The result of my objection should be you not hardcoding any number...
>
> I guess I can query hypervisor's symbol size by passing to
> XENPF_get_symbol current symbol number as -1 (or some other token).
No - just don't assume any particular length.
Jan
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 1/6] xen: xensyms support
2014-06-10 15:21 ` Jan Beulich
@ 2014-06-10 15:45 ` Boris Ostrovsky
2014-06-10 16:13 ` Jan Beulich
0 siblings, 1 reply; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-10 15:45 UTC (permalink / raw)
To: Jan Beulich; +Cc: kevin.tian, David Vrabel, dietmar.hahn, xen-devel
On 06/10/2014 11:21 AM, Jan Beulich wrote:
>>>> On 10.06.14 at 17:03, <boris.ostrovsky@oracle.com> wrote:
>> On 06/10/2014 10:51 AM, Jan Beulich wrote:
>>>>>> On 10.06.14 at 16:49, <boris.ostrovsky@oracle.com> wrote:
>>>> On 06/10/2014 09:31 AM, David Vrabel wrote:
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/xen/xenfs/xensyms.c
>>>>>> @@ -0,0 +1,124 @@
>>>>>> +#include <linux/module.h>
>>>>>> +#include <linux/init.h>
>>>>>> +#include <linux/seq_file.h>
>>>>>> +#include <linux/fs.h>
>>>>>> +#include <linux/mm.h>
>>>>>> +#include <linux/proc_fs.h>
>>>>>> +#include <linux/slab.h>
>>>>>> +#include <xen/interface/platform.h>
>>>>>> +#include <asm/xen/hypercall.h>
>>>>>> +#include <xen/xen-ops.h>
>>>>>> +#include "xenfs.h"
>>>>>> +
>>>>>> +
>>>>>> +#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length
>>>> */
>>>>> Shouldn't this be exported in the hypervisor headers then?
>>>> Jan objected to having this as part of the interface so now we pass this
>>>> as a parameter to the hypervisor. I may return Xen's symbol length and,
>>>> if it is larger than XEN_KSYM_NAME_LEN, do a WARN_ONCE() (in the next
>>>> spin of the patch).
>>> The result of my objection should be you not hardcoding any number...
>> I guess I can query hypervisor's symbol size by passing to
>> XENPF_get_symbol current symbol number as -1 (or some other token).
> No - just don't assume any particular length.
I don't follow. I need to start with something and if I don't query
hypervisor --- how would I get going?
-boris
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 1/6] xen: xensyms support
2014-06-10 15:45 ` Boris Ostrovsky
@ 2014-06-10 16:13 ` Jan Beulich
0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2014-06-10 16:13 UTC (permalink / raw)
To: Boris Ostrovsky; +Cc: kevin.tian, David Vrabel, dietmar.hahn, xen-devel
>>> On 10.06.14 at 17:45, <boris.ostrovsky@oracle.com> wrote:
> On 06/10/2014 11:21 AM, Jan Beulich wrote:
>>>>> On 10.06.14 at 17:03, <boris.ostrovsky@oracle.com> wrote:
>>> On 06/10/2014 10:51 AM, Jan Beulich wrote:
>>>>>>> On 10.06.14 at 16:49, <boris.ostrovsky@oracle.com> wrote:
>>>>> On 06/10/2014 09:31 AM, David Vrabel wrote:
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/xen/xenfs/xensyms.c
>>>>>>> @@ -0,0 +1,124 @@
>>>>>>> +#include <linux/module.h>
>>>>>>> +#include <linux/init.h>
>>>>>>> +#include <linux/seq_file.h>
>>>>>>> +#include <linux/fs.h>
>>>>>>> +#include <linux/mm.h>
>>>>>>> +#include <linux/proc_fs.h>
>>>>>>> +#include <linux/slab.h>
>>>>>>> +#include <xen/interface/platform.h>
>>>>>>> +#include <asm/xen/hypercall.h>
>>>>>>> +#include <xen/xen-ops.h>
>>>>>>> +#include "xenfs.h"
>>>>>>> +
>>>>>>> +
>>>>>>> +#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length
>>>>> */
>>>>>> Shouldn't this be exported in the hypervisor headers then?
>>>>> Jan objected to having this as part of the interface so now we pass this
>>>>> as a parameter to the hypervisor. I may return Xen's symbol length and,
>>>>> if it is larger than XEN_KSYM_NAME_LEN, do a WARN_ONCE() (in the next
>>>>> spin of the patch).
>>>> The result of my objection should be you not hardcoding any number...
>>> I guess I can query hypervisor's symbol size by passing to
>>> XENPF_get_symbol current symbol number as -1 (or some other token).
>> No - just don't assume any particular length.
>
> I don't follow. I need to start with something and if I don't query
> hypervisor --- how would I get going?
Just use a reasonable number as a start, and e.g. double the buffer
each time you get indication of a longer symbol.
Jan
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 1/6] xen: xensyms support
2014-06-06 17:44 ` [PATCH v2 1/6] xen: xensyms support Boris Ostrovsky
2014-06-10 13:31 ` David Vrabel
@ 2014-06-11 9:37 ` Dietmar Hahn
1 sibling, 0 replies; 30+ messages in thread
From: Dietmar Hahn @ 2014-06-11 9:37 UTC (permalink / raw)
To: xen-devel; +Cc: kevin.tian, Boris Ostrovsky, david.vrabel, JBeulich
Am Freitag 06 Juni 2014, 13:44:41 schrieb Boris Ostrovsky:
> Export Xen symbols to dom0 via /proc/xen/xensyms (similar to /proc/kallsyms).
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> ---
> drivers/xen/Kconfig | 5 ++
> drivers/xen/xenfs/Makefile | 1 +
> drivers/xen/xenfs/super.c | 3 +
> drivers/xen/xenfs/xenfs.h | 1 +
> drivers/xen/xenfs/xensyms.c | 124 +++++++++++++++++++++++++++++++++++++++
> include/xen/interface/platform.h | 19 ++++++
> 6 files changed, 153 insertions(+)
> create mode 100644 drivers/xen/xenfs/xensyms.c
>
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index 38fb36e..b3d1da7 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -240,4 +240,9 @@ config XEN_MCE_LOG
> config XEN_HAVE_PVMMU
> bool
>
> +config XEN_SYMS
> + bool "Xen symbols"
> + depends on XEN_DOM0 && XENFS
> + default y if KALLSYMS
> +
> endmenu
> diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
> index b019865..1a83010 100644
> --- a/drivers/xen/xenfs/Makefile
> +++ b/drivers/xen/xenfs/Makefile
> @@ -2,3 +2,4 @@ obj-$(CONFIG_XENFS) += xenfs.o
>
> xenfs-y = super.o
> xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
> +xenfs-$(CONFIG_XEN_SYMS) += xensyms.o
> diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
> index 06092e0..8559a71 100644
> --- a/drivers/xen/xenfs/super.c
> +++ b/drivers/xen/xenfs/super.c
> @@ -57,6 +57,9 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
> { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
> { "xsd_kva", &xsd_kva_file_ops, S_IRUSR|S_IWUSR},
> { "xsd_port", &xsd_port_file_ops, S_IRUSR|S_IWUSR},
> +#ifdef CONFIG_XEN_SYMS
> + { "xensyms", &xensyms_ops, S_IRUSR},
> +#endif
> {""},
> };
>
> diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
> index 6b80c77..2c5934e 100644
> --- a/drivers/xen/xenfs/xenfs.h
> +++ b/drivers/xen/xenfs/xenfs.h
> @@ -3,5 +3,6 @@
>
> extern const struct file_operations xsd_kva_file_ops;
> extern const struct file_operations xsd_port_file_ops;
> +extern const struct file_operations xensyms_ops;
>
> #endif /* _XENFS_XENBUS_H */
> diff --git a/drivers/xen/xenfs/xensyms.c b/drivers/xen/xenfs/xensyms.c
> new file mode 100644
> index 0000000..748948c
> --- /dev/null
> +++ b/drivers/xen/xenfs/xensyms.c
> @@ -0,0 +1,124 @@
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/seq_file.h>
> +#include <linux/fs.h>
> +#include <linux/mm.h>
> +#include <linux/proc_fs.h>
> +#include <linux/slab.h>
> +#include <xen/interface/platform.h>
> +#include <asm/xen/hypercall.h>
> +#include <xen/xen-ops.h>
> +#include "xenfs.h"
> +
> +
> +#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length */
> +
> +
> +/* Grab next output page from the hypervisor */
> +static int xensyms_next_sym(struct xen_platform_op *op)
> +{
> + int ret;
> + uint64_t symnum = op->u.symdata.symnum;
> +
> + memset(op->u.symdata.name, 0, XEN_KSYM_NAME_LEN + 1);
> +
> + ret = HYPERVISOR_dom0_op(op);
> + if (ret < 0)
> + return ret;
> +
> + if (op->u.symdata.symnum == symnum)
> + /* End of symbols */
> + return 1;
> +
> + return 0;
> +}
> +
> +static void *xensyms_start(struct seq_file *m, loff_t *pos)
> +{
> + struct xen_platform_op *op = (struct xen_platform_op *)m->private;
> +
> + if (op->u.symdata.symnum != *pos)
> + op->u.symdata.symnum = *pos;
> +
> + if (xensyms_next_sym(op))
> + return NULL;
> +
> + return m->private;
> +}
> +
> +static void *xensyms_next(struct seq_file *m, void *p, loff_t *pos)
> +{
> + struct xen_platform_op *op = (struct xen_platform_op *)m->private;
> +
> + (*pos)++;
> +
> + if (op->u.symdata.symnum != *pos)
> + op->u.symdata.symnum = *pos;
> +
> + if (xensyms_next_sym(op))
> + return NULL;
> +
> + return p;
> +}
> +
> +static int xensyms_show(struct seq_file *m, void *p)
> +{
> + struct xen_platform_op *op = (struct xen_platform_op *)m->private;
> +
> + seq_printf(m, "%016llx %c %s\n", op->u.symdata.address,
> + op->u.symdata.type, op->u.symdata.name);
> +
> + return 0;
> +}
> +
> +static void xensyms_stop(struct seq_file *m, void *p)
> +{
> +}
> +
> +static const struct seq_operations xensyms_seq_ops = {
> + .start = xensyms_start,
> + .next = xensyms_next,
> + .show = xensyms_show,
> + .stop = xensyms_stop,
> +};
> +
> +static int xensyms_open(struct inode *inode, struct file *file)
> +{
> + struct seq_file *m;
> + struct xen_platform_op *op;
> + char *buf;
> + int ret;
> +
> + buf = kzalloc(XEN_KSYM_NAME_LEN + 1, GFP_KERNEL);
> + if (!buf)
> + return -ENOMEM;
> +
> + ret = seq_open_private(file, &xensyms_seq_ops,
> + sizeof(struct xen_platform_op));
> + if (ret)
> + return ret;
kfree(buf) in error case?
Dietmar.
> + m = file->private_data;
> + op = (struct xen_platform_op *)m->private;
> + set_xen_guest_handle(op->u.symdata.name, buf);
> + op->cmd = XENPF_get_symbol;
> + op->u.symdata.namelen = XEN_KSYM_NAME_LEN + 1;
> +
> + return 0;
> +}
> +
> +static int xensyms_release(struct inode *inode, struct file *file)
> +{
> + struct seq_file *m = file->private_data;
> + struct xen_platform_op *op = (struct xen_platform_op *)m->private;
> +
> + kfree(op->u.symdata.name);
> + return seq_release_private(inode, file);
> +}
> +
> +const struct file_operations xensyms_ops = {
> + .open = xensyms_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = xensyms_release
> +};
> diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
> index f1331e3..792e29a 100644
> --- a/include/xen/interface/platform.h
> +++ b/include/xen/interface/platform.h
> @@ -352,6 +352,24 @@ struct xenpf_core_parking {
> };
> DEFINE_GUEST_HANDLE_STRUCT(xenpf_core_parking);
>
> +#define XENPF_get_symbol 61
> +struct xenpf_symdata {
> + /* IN/OUT variables */
> + uint32_t namelen; /* size of 'name' buffer */
> +
> + /* IN/OUT variables */
> + uint32_t symnum; /* IN: Symbol to read */
> + /* OUT: Next available symbol. If same as IN then */
> + /* we reached the end */
> +
> + /* OUT variables */
> + char type;
> + uint64_t address;
> + GUEST_HANDLE(char) name;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(xenpf_symdata);
> +
> +
> struct xen_platform_op {
> uint32_t cmd;
> uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
> @@ -372,6 +390,7 @@ struct xen_platform_op {
> struct xenpf_cpu_hotadd cpu_add;
> struct xenpf_mem_hotadd mem_add;
> struct xenpf_core_parking core_parking;
> + struct xenpf_symdata symdata;
> uint8_t pad[128];
> } u;
> };
>
--
Company details: http://ts.fujitsu.com/imprint.html
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode
2014-06-06 17:44 [PATCH v2 0/6] xen/PMU: PMU support for Xen PV guests Boris Ostrovsky
2014-06-06 17:44 ` [PATCH v2 1/6] xen: xensyms support Boris Ostrovsky
@ 2014-06-06 17:44 ` Boris Ostrovsky
2014-06-06 20:19 ` Konrad Rzeszutek Wilk
` (2 more replies)
2014-06-06 17:44 ` [PATCH v2 3/6] xen/PMU: Initialization code for Xen PMU Boris Ostrovsky
` (4 subsequent siblings)
6 siblings, 3 replies; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-06 17:44 UTC (permalink / raw)
To: david.vrabel, konrad.wilk
Cc: boris.ostrovsky, kevin.tian, dietmar.hahn, JBeulich, xen-devel
Set Xen's PMU mode via /sys/hypervisor/pmu/pmu_mode. Add XENPMU hypercall.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
arch/x86/include/asm/xen/hypercall.h | 6 ++
arch/x86/xen/xen-head.S | 5 +-
drivers/xen/sys-hypervisor.c | 119 +++++++++++++++++++++++++++++++++++
include/xen/interface/xen.h | 1 +
include/xen/interface/xenpmu.h | 49 +++++++++++++++
5 files changed, 179 insertions(+), 1 deletion(-)
create mode 100644 include/xen/interface/xenpmu.h
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index e709884..f022cef 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -465,6 +465,12 @@ HYPERVISOR_tmem_op(
return _hypercall1(int, tmem_op, op);
}
+static inline int
+HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
+{
+ return _hypercall2(int, xenpmu_op, op, arg);
+}
+
static inline void
MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
{
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 485b695..628852f 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -93,8 +93,11 @@ NEXT_HYPERCALL(sysctl)
NEXT_HYPERCALL(domctl)
NEXT_HYPERCALL(kexec_op)
NEXT_HYPERCALL(tmem_op) /* 38 */
+ENTRY(xenclient_rsvd)
+ .skip 32
+NEXT_HYPERCALL(xenpmu_op) /* 40 */
ENTRY(xen_hypercall_rsvr)
- .skip 320
+ .skip 256
NEXT_HYPERCALL(mca) /* 48 */
NEXT_HYPERCALL(arch_1)
NEXT_HYPERCALL(arch_2)
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index 96453f8..b37ae2d 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -20,6 +20,7 @@
#include <xen/xenbus.h>
#include <xen/interface/xen.h>
#include <xen/interface/version.h>
+#include <xen/interface/xenpmu.h>
#define HYPERVISOR_ATTR_RO(_name) \
static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
@@ -368,6 +369,116 @@ static void xen_properties_destroy(void)
sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
}
+struct pmu_mode {
+ const char *name;
+ uint32_t mode;
+};
+
+struct pmu_mode pmu_modes[] = {
+ {"enable", XENPMU_MODE_ON},
+ {"priv_enable", XENPMU_MODE_PRIV},
+ {"disable", 0}
+};
+
+static ssize_t pmu_mode_store(struct hyp_sysfs_attr *attr,
+ const char *buffer, size_t len)
+{
+ int ret;
+ struct xen_pmu_params xp;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
+ if (!strncmp(buffer, pmu_modes[i].name,
+ strlen(pmu_modes[i].name))) {
+ xp.val = pmu_modes[i].mode;
+ break;
+ }
+ }
+
+ if (i == ARRAY_SIZE(pmu_modes))
+ return -EINVAL;
+
+ ret = HYPERVISOR_xenpmu_op(XENPMU_mode_set, &xp);
+ if (ret)
+ return ret;
+
+ return len;
+}
+
+static ssize_t pmu_mode_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+ int ret;
+ struct xen_pmu_params xp;
+ int i;
+ uint32_t mode;
+
+ ret = HYPERVISOR_xenpmu_op(XENPMU_mode_get, &xp);
+ if (ret)
+ return ret;
+
+ mode = (uint32_t)xp.val;
+ for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
+ if (mode == pmu_modes[i].mode)
+ return sprintf(buffer, "%s\n", pmu_modes[i].name);
+ }
+
+ return -EINVAL;
+}
+HYPERVISOR_ATTR_RW(pmu_mode);
+
+static ssize_t pmu_features_store(struct hyp_sysfs_attr *attr,
+ const char *buffer, size_t len)
+{
+ int ret;
+ uint32_t features;
+ struct xen_pmu_params xp;
+
+ ret = kstrtou32(buffer, 0, &features);
+ if (ret)
+ return ret;
+
+ xp.val = features;
+ ret = HYPERVISOR_xenpmu_op(XENPMU_feature_set, &xp);
+ if (ret)
+ return ret;
+
+ return len;
+}
+
+static ssize_t pmu_features_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+ int ret;
+ struct xen_pmu_params xp;
+
+ ret = HYPERVISOR_xenpmu_op(XENPMU_feature_get, &xp);
+ if (ret)
+ return ret;
+
+ return sprintf(buffer, "0x%x\n", (uint32_t)xp.val);
+}
+HYPERVISOR_ATTR_RW(pmu_features);
+
+static struct attribute *xen_pmu_attrs[] = {
+ &pmu_mode_attr.attr,
+ &pmu_features_attr.attr,
+ NULL
+};
+
+static const struct attribute_group xen_pmu_group = {
+ .name = "pmu",
+ .attrs = xen_pmu_attrs,
+};
+
+static int __init xen_pmu_init(void)
+{
+ return sysfs_create_group(hypervisor_kobj, &xen_pmu_group);
+}
+
+static void xen_pmu_destroy(void)
+{
+ sysfs_remove_group(hypervisor_kobj, &xen_pmu_group);
+}
+
static int __init hyper_sysfs_init(void)
{
int ret;
@@ -390,9 +501,16 @@ static int __init hyper_sysfs_init(void)
ret = xen_properties_init();
if (ret)
goto prop_out;
+ if (xen_initial_domain()) {
+ ret = xen_pmu_init();
+ if (ret)
+ goto pmu_out;
+ }
goto out;
+pmu_out:
+ xen_properties_destroy();
prop_out:
xen_sysfs_uuid_destroy();
uuid_out:
@@ -407,6 +525,7 @@ out:
static void __exit hyper_sysfs_exit(void)
{
+ xen_pmu_destroy();
xen_properties_destroy();
xen_compilation_destroy();
xen_sysfs_uuid_destroy();
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 0cd5ca3..d071e4a 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -58,6 +58,7 @@
#define __HYPERVISOR_physdev_op 33
#define __HYPERVISOR_hvm_op 34
#define __HYPERVISOR_tmem_op 38
+#define __HYPERVISOR_xenpmu_op 40
/* Architecture-specific hypercall definitions. */
#define __HYPERVISOR_arch_0 48
diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
new file mode 100644
index 0000000..0d15d3a
--- /dev/null
+++ b/include/xen/interface/xenpmu.h
@@ -0,0 +1,49 @@
+#ifndef __XEN_PUBLIC_XENPMU_H__
+#define __XEN_PUBLIC_XENPMU_H__
+
+#include "xen.h"
+
+#define XENPMU_VER_MAJ 0
+#define XENPMU_VER_MIN 1
+
+/* HYPERVISOR_xenpmu_op commands */
+#define XENPMU_mode_get 0
+#define XENPMU_mode_set 1
+#define XENPMU_feature_get 2
+#define XENPMU_feature_set 3
+
+/* Parameter structure for HYPERVISOR_xenpmu_op call */
+struct xen_pmu_params {
+ union {
+ struct version {
+ uint8_t maj;
+ uint8_t min;
+ } version;
+ uint64_t pad;
+ };
+ union {
+ uint64_t val;
+ GUEST_HANDLE(void) valp;
+ };
+};
+
+/* PMU modes:
+ * - XENPMU_MODE_OFF: No PMU virtualization
+ * - XENPMU_MODE_ON: Guests can profile themselves, dom0 profiles
+ * itself and Xen
+ * - XENPMU_MODE_PRIV: Only dom0 has access to VPMU and it profiles
+ * everyone: itself, the hypervisor and the guests.
+ */
+#define XENPMU_MODE_OFF 0
+#define XENPMU_MODE_ON (1<<0)
+#define XENPMU_MODE_PRIV (1<<1)
+
+
+/*
+ * PMU features:
+ * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD)
+ */
+#define XENPMU_FEATURE_INTEL_BTS 1
+
+
+#endif /* __XEN_PUBLIC_XENPMU_H__ */
--
1.8.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode
2014-06-06 17:44 ` [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode Boris Ostrovsky
@ 2014-06-06 20:19 ` Konrad Rzeszutek Wilk
2014-06-10 13:33 ` David Vrabel
2014-06-10 13:48 ` David Vrabel
2014-06-11 10:13 ` Dietmar Hahn
2 siblings, 1 reply; 30+ messages in thread
From: Konrad Rzeszutek Wilk @ 2014-06-06 20:19 UTC (permalink / raw)
To: Boris Ostrovsky
Cc: kevin.tian, dietmar.hahn, david.vrabel, JBeulich, xen-devel
On Fri, Jun 06, 2014 at 01:44:42PM -0400, Boris Ostrovsky wrote:
> Set Xen's PMU mode via /sys/hypervisor/pmu/pmu_mode. Add XENPMU hypercall.
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> ---
> arch/x86/include/asm/xen/hypercall.h | 6 ++
> arch/x86/xen/xen-head.S | 5 +-
> drivers/xen/sys-hypervisor.c | 119 +++++++++++++++++++++++++++++++++++
You probably need a Documentation patch as well.
..and some rather minor changes:
> include/xen/interface/xen.h | 1 +
> include/xen/interface/xenpmu.h | 49 +++++++++++++++
> 5 files changed, 179 insertions(+), 1 deletion(-)
> create mode 100644 include/xen/interface/xenpmu.h
>
> diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
> index e709884..f022cef 100644
> --- a/arch/x86/include/asm/xen/hypercall.h
> +++ b/arch/x86/include/asm/xen/hypercall.h
> @@ -465,6 +465,12 @@ HYPERVISOR_tmem_op(
> return _hypercall1(int, tmem_op, op);
> }
>
> +static inline int
> +HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
> +{
> + return _hypercall2(int, xenpmu_op, op, arg);
> +}
> +
> static inline void
> MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
> {
> diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> index 485b695..628852f 100644
> --- a/arch/x86/xen/xen-head.S
> +++ b/arch/x86/xen/xen-head.S
> @@ -93,8 +93,11 @@ NEXT_HYPERCALL(sysctl)
> NEXT_HYPERCALL(domctl)
> NEXT_HYPERCALL(kexec_op)
> NEXT_HYPERCALL(tmem_op) /* 38 */
> +ENTRY(xenclient_rsvd)
> + .skip 32
> +NEXT_HYPERCALL(xenpmu_op) /* 40 */
> ENTRY(xen_hypercall_rsvr)
> - .skip 320
> + .skip 256
> NEXT_HYPERCALL(mca) /* 48 */
> NEXT_HYPERCALL(arch_1)
> NEXT_HYPERCALL(arch_2)
> diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
> index 96453f8..b37ae2d 100644
> --- a/drivers/xen/sys-hypervisor.c
> +++ b/drivers/xen/sys-hypervisor.c
> @@ -20,6 +20,7 @@
> #include <xen/xenbus.h>
> #include <xen/interface/xen.h>
> #include <xen/interface/version.h>
> +#include <xen/interface/xenpmu.h>
>
> #define HYPERVISOR_ATTR_RO(_name) \
> static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
> @@ -368,6 +369,116 @@ static void xen_properties_destroy(void)
> sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
> }
>
> +struct pmu_mode {
> + const char *name;
> + uint32_t mode;
> +};
> +
> +struct pmu_mode pmu_modes[] = {
> + {"enable", XENPMU_MODE_ON},
> + {"priv_enable", XENPMU_MODE_PRIV},
> + {"disable", 0}
> +};
> +
> +static ssize_t pmu_mode_store(struct hyp_sysfs_attr *attr,
> + const char *buffer, size_t len)
> +{
> + int ret;
> + struct xen_pmu_params xp;
> + int i;
'i' can be on the same line as 'ret'
> +
> + for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
> + if (!strncmp(buffer, pmu_modes[i].name,
> + strlen(pmu_modes[i].name))) {
> + xp.val = pmu_modes[i].mode;
> + break;
> + }
> + }
> +
> + if (i == ARRAY_SIZE(pmu_modes))
> + return -EINVAL;
> +
> + ret = HYPERVISOR_xenpmu_op(XENPMU_mode_set, &xp);
> + if (ret)
> + return ret;
> +
> + return len;
> +}
> +
> +static ssize_t pmu_mode_show(struct hyp_sysfs_attr *attr, char *buffer)
> +{
> + int ret;
> + struct xen_pmu_params xp;
> + int i;
'i' can be on the same line as 'ret'
> + uint32_t mode;
> +
> + ret = HYPERVISOR_xenpmu_op(XENPMU_mode_get, &xp);
> + if (ret)
> + return ret;
> +
> + mode = (uint32_t)xp.val;
> + for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
> + if (mode == pmu_modes[i].mode)
> + return sprintf(buffer, "%s\n", pmu_modes[i].name);
> + }
> +
> + return -EINVAL;
> +}
> +HYPERVISOR_ATTR_RW(pmu_mode);
> +
> +static ssize_t pmu_features_store(struct hyp_sysfs_attr *attr,
> + const char *buffer, size_t len)
> +{
> + int ret;
> + uint32_t features;
> + struct xen_pmu_params xp;
> +
> + ret = kstrtou32(buffer, 0, &features);
> + if (ret)
> + return ret;
> +
> + xp.val = features;
> + ret = HYPERVISOR_xenpmu_op(XENPMU_feature_set, &xp);
> + if (ret)
> + return ret;
> +
> + return len;
> +}
> +
> +static ssize_t pmu_features_show(struct hyp_sysfs_attr *attr, char *buffer)
> +{
> + int ret;
> + struct xen_pmu_params xp;
> +
> + ret = HYPERVISOR_xenpmu_op(XENPMU_feature_get, &xp);
> + if (ret)
> + return ret;
> +
> + return sprintf(buffer, "0x%x\n", (uint32_t)xp.val);
> +}
> +HYPERVISOR_ATTR_RW(pmu_features);
> +
> +static struct attribute *xen_pmu_attrs[] = {
> + &pmu_mode_attr.attr,
> + &pmu_features_attr.attr,
> + NULL
> +};
> +
> +static const struct attribute_group xen_pmu_group = {
> + .name = "pmu",
> + .attrs = xen_pmu_attrs,
> +};
> +
> +static int __init xen_pmu_init(void)
> +{
> + return sysfs_create_group(hypervisor_kobj, &xen_pmu_group);
> +}
> +
> +static void xen_pmu_destroy(void)
> +{
> + sysfs_remove_group(hypervisor_kobj, &xen_pmu_group);
> +}
> +
> static int __init hyper_sysfs_init(void)
> {
> int ret;
> @@ -390,9 +501,16 @@ static int __init hyper_sysfs_init(void)
> ret = xen_properties_init();
> if (ret)
> goto prop_out;
> + if (xen_initial_domain()) {
> + ret = xen_pmu_init();
> + if (ret)
> + goto pmu_out;
> + }
>
> goto out;
>
> +pmu_out:
> + xen_properties_destroy();
> prop_out:
> xen_sysfs_uuid_destroy();
> uuid_out:
> @@ -407,6 +525,7 @@ out:
>
> static void __exit hyper_sysfs_exit(void)
> {
> + xen_pmu_destroy();
> xen_properties_destroy();
> xen_compilation_destroy();
> xen_sysfs_uuid_destroy();
> diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
> index 0cd5ca3..d071e4a 100644
> --- a/include/xen/interface/xen.h
> +++ b/include/xen/interface/xen.h
> @@ -58,6 +58,7 @@
> #define __HYPERVISOR_physdev_op 33
> #define __HYPERVISOR_hvm_op 34
> #define __HYPERVISOR_tmem_op 38
> +#define __HYPERVISOR_xenpmu_op 40
>
> /* Architecture-specific hypercall definitions. */
> #define __HYPERVISOR_arch_0 48
> diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
> new file mode 100644
> index 0000000..0d15d3a
> --- /dev/null
> +++ b/include/xen/interface/xenpmu.h
> @@ -0,0 +1,49 @@
> +#ifndef __XEN_PUBLIC_XENPMU_H__
> +#define __XEN_PUBLIC_XENPMU_H__
> +
> +#include "xen.h"
> +
> +#define XENPMU_VER_MAJ 0
> +#define XENPMU_VER_MIN 1
> +
> +/* HYPERVISOR_xenpmu_op commands */
> +#define XENPMU_mode_get 0
> +#define XENPMU_mode_set 1
> +#define XENPMU_feature_get 2
> +#define XENPMU_feature_set 3
> +
> +/* Parameter structure for HYPERVISOR_xenpmu_op call */
> +struct xen_pmu_params {
> + union {
> + struct version {
> + uint8_t maj;
> + uint8_t min;
> + } version;
> + uint64_t pad;
> + };
> + union {
> + uint64_t val;
> + GUEST_HANDLE(void) valp;
> + };
> +};
> +
> +/* PMU modes:
> + * - XENPMU_MODE_OFF: No PMU virtualization
> + * - XENPMU_MODE_ON: Guests can profile themselves, dom0 profiles
> + * itself and Xen
> + * - XENPMU_MODE_PRIV: Only dom0 has access to VPMU and it profiles
> + * everyone: itself, the hypervisor and the guests.
> + */
> +#define XENPMU_MODE_OFF 0
> +#define XENPMU_MODE_ON (1<<0)
> +#define XENPMU_MODE_PRIV (1<<1)
> +
> +
> +/*
> + * PMU features:
> + * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD)
> + */
> +#define XENPMU_FEATURE_INTEL_BTS 1
> +
> +
> +#endif /* __XEN_PUBLIC_XENPMU_H__ */
> --
> 1.8.1.4
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode
2014-06-06 20:19 ` Konrad Rzeszutek Wilk
@ 2014-06-10 13:33 ` David Vrabel
0 siblings, 0 replies; 30+ messages in thread
From: David Vrabel @ 2014-06-10 13:33 UTC (permalink / raw)
To: Konrad Rzeszutek Wilk, Boris Ostrovsky
Cc: kevin.tian, dietmar.hahn, JBeulich, xen-devel
On 06/06/14 21:19, Konrad Rzeszutek Wilk wrote:
>
>> +static ssize_t pmu_mode_show(struct hyp_sysfs_attr *attr, char *buffer)
>> +{
>> + int ret;
>> + struct xen_pmu_params xp;
>> + int i;
>
> 'i' can be on the same line as 'ret'
No thanks. Only do this if the two variables are related.
David
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode
2014-06-06 17:44 ` [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode Boris Ostrovsky
2014-06-06 20:19 ` Konrad Rzeszutek Wilk
@ 2014-06-10 13:48 ` David Vrabel
2014-06-10 14:52 ` Boris Ostrovsky
2014-06-11 10:13 ` Dietmar Hahn
2 siblings, 1 reply; 30+ messages in thread
From: David Vrabel @ 2014-06-10 13:48 UTC (permalink / raw)
To: Boris Ostrovsky, konrad.wilk
Cc: kevin.tian, dietmar.hahn, JBeulich, xen-devel
On 06/06/14 18:44, Boris Ostrovsky wrote:
> Set Xen's PMU mode via /sys/hypervisor/pmu/pmu_mode. Add XENPMU hypercall.
sysfs files need documentation in Documentaton/ABI/
> --- a/drivers/xen/sys-hypervisor.c
> +++ b/drivers/xen/sys-hypervisor.c
> @@ -20,6 +20,7 @@
> #include <xen/xenbus.h>
> #include <xen/interface/xen.h>
> #include <xen/interface/version.h>
> +#include <xen/interface/xenpmu.h>
>
> #define HYPERVISOR_ATTR_RO(_name) \
> static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
> @@ -368,6 +369,116 @@ static void xen_properties_destroy(void)
> sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
> }
>
> +struct pmu_mode {
> + const char *name;
> + uint32_t mode;
> +};
> +
> +struct pmu_mode pmu_modes[] = {
> + {"enable", XENPMU_MODE_ON},
> + {"priv_enable", XENPMU_MODE_PRIV},
> + {"disable", 0}
> +};
If there a better, more description set of options here? How about:
"self", "all", "none"?
> +
> +static ssize_t pmu_mode_store(struct hyp_sysfs_attr *attr,
> + const char *buffer, size_t len)
> +{
> + int ret;
> + struct xen_pmu_params xp;
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
> + if (!strncmp(buffer, pmu_modes[i].name,
> + strlen(pmu_modes[i].name))) {
I prefer if (strncmp(...) == 0) as I find the use of the not operator
for a positive match confusing.
David
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode
2014-06-10 13:48 ` David Vrabel
@ 2014-06-10 14:52 ` Boris Ostrovsky
0 siblings, 0 replies; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-10 14:52 UTC (permalink / raw)
To: David Vrabel; +Cc: kevin.tian, xen-devel, dietmar.hahn, JBeulich
On 06/10/2014 09:48 AM, David Vrabel wrote:
>
>> --- a/drivers/xen/sys-hypervisor.c
>> +++ b/drivers/xen/sys-hypervisor.c
>> @@ -20,6 +20,7 @@
>> #include <xen/xenbus.h>
>> #include <xen/interface/xen.h>
>> #include <xen/interface/version.h>
>> +#include <xen/interface/xenpmu.h>
>>
>> #define HYPERVISOR_ATTR_RO(_name) \
>> static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
>> @@ -368,6 +369,116 @@ static void xen_properties_destroy(void)
>> sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
>> }
>>
>> +struct pmu_mode {
>> + const char *name;
>> + uint32_t mode;
>> +};
>> +
>> +struct pmu_mode pmu_modes[] = {
>> + {"enable", XENPMU_MODE_ON},
>> + {"priv_enable", XENPMU_MODE_PRIV},
>> + {"disable", 0}
>> +};
>
> If there a better, more description set of options here? How about:
>
> "self", "all", "none"?
Yes, that's better ("self" is in fact both guest and hypervisor for dom0
but it's probably OK).
I'll need to rename macros as well.
-boris
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode
2014-06-06 17:44 ` [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode Boris Ostrovsky
2014-06-06 20:19 ` Konrad Rzeszutek Wilk
2014-06-10 13:48 ` David Vrabel
@ 2014-06-11 10:13 ` Dietmar Hahn
2014-06-11 12:53 ` Boris Ostrovsky
2 siblings, 1 reply; 30+ messages in thread
From: Dietmar Hahn @ 2014-06-11 10:13 UTC (permalink / raw)
To: xen-devel; +Cc: kevin.tian, Boris Ostrovsky, david.vrabel, JBeulich
Am Freitag 06 Juni 2014, 13:44:42 schrieb Boris Ostrovsky:
> Set Xen's PMU mode via /sys/hypervisor/pmu/pmu_mode. Add XENPMU hypercall.
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> ---
> arch/x86/include/asm/xen/hypercall.h | 6 ++
> arch/x86/xen/xen-head.S | 5 +-
> drivers/xen/sys-hypervisor.c | 119 +++++++++++++++++++++++++++++++++++
> include/xen/interface/xen.h | 1 +
> include/xen/interface/xenpmu.h | 49 +++++++++++++++
> 5 files changed, 179 insertions(+), 1 deletion(-)
> create mode 100644 include/xen/interface/xenpmu.h
>
> diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
> index e709884..f022cef 100644
> --- a/arch/x86/include/asm/xen/hypercall.h
> +++ b/arch/x86/include/asm/xen/hypercall.h
> @@ -465,6 +465,12 @@ HYPERVISOR_tmem_op(
> return _hypercall1(int, tmem_op, op);
> }
>
> +static inline int
> +HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
> +{
> + return _hypercall2(int, xenpmu_op, op, arg);
> +}
> +
> static inline void
> MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
> {
> diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> index 485b695..628852f 100644
> --- a/arch/x86/xen/xen-head.S
> +++ b/arch/x86/xen/xen-head.S
> @@ -93,8 +93,11 @@ NEXT_HYPERCALL(sysctl)
> NEXT_HYPERCALL(domctl)
> NEXT_HYPERCALL(kexec_op)
> NEXT_HYPERCALL(tmem_op) /* 38 */
> +ENTRY(xenclient_rsvd)
> + .skip 32
> +NEXT_HYPERCALL(xenpmu_op) /* 40 */
> ENTRY(xen_hypercall_rsvr)
> - .skip 320
> + .skip 256
> NEXT_HYPERCALL(mca) /* 48 */
> NEXT_HYPERCALL(arch_1)
> NEXT_HYPERCALL(arch_2)
> diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
> index 96453f8..b37ae2d 100644
> --- a/drivers/xen/sys-hypervisor.c
> +++ b/drivers/xen/sys-hypervisor.c
> @@ -20,6 +20,7 @@
> #include <xen/xenbus.h>
> #include <xen/interface/xen.h>
> #include <xen/interface/version.h>
> +#include <xen/interface/xenpmu.h>
>
> #define HYPERVISOR_ATTR_RO(_name) \
> static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
> @@ -368,6 +369,116 @@ static void xen_properties_destroy(void)
> sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
> }
>
> +struct pmu_mode {
> + const char *name;
> + uint32_t mode;
> +};
> +
> +struct pmu_mode pmu_modes[] = {
> + {"enable", XENPMU_MODE_ON},
> + {"priv_enable", XENPMU_MODE_PRIV},
> + {"disable", 0}
> +};
> +
> +static ssize_t pmu_mode_store(struct hyp_sysfs_attr *attr,
> + const char *buffer, size_t len)
> +{
> + int ret;
> + struct xen_pmu_params xp;
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
> + if (!strncmp(buffer, pmu_modes[i].name,
> + strlen(pmu_modes[i].name))) {
> + xp.val = pmu_modes[i].mode;
> + break;
> + }
> + }
> +
> + if (i == ARRAY_SIZE(pmu_modes))
> + return -EINVAL;
> +
> + ret = HYPERVISOR_xenpmu_op(XENPMU_mode_set, &xp);
> + if (ret)
> + return ret;
> +
> + return len;
> +}
> +
> +static ssize_t pmu_mode_show(struct hyp_sysfs_attr *attr, char *buffer)
> +{
> + int ret;
> + struct xen_pmu_params xp;
> + int i;
> + uint32_t mode;
> +
> + ret = HYPERVISOR_xenpmu_op(XENPMU_mode_get, &xp);
> + if (ret)
> + return ret;
> +
> + mode = (uint32_t)xp.val;
> + for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
> + if (mode == pmu_modes[i].mode)
> + return sprintf(buffer, "%s\n", pmu_modes[i].name);
> + }
> +
> + return -EINVAL;
> +}
> +HYPERVISOR_ATTR_RW(pmu_mode);
> +
> +static ssize_t pmu_features_store(struct hyp_sysfs_attr *attr,
> + const char *buffer, size_t len)
> +{
> + int ret;
> + uint32_t features;
> + struct xen_pmu_params xp;
> +
> + ret = kstrtou32(buffer, 0, &features);
> + if (ret)
> + return ret;
> +
> + xp.val = features;
> + ret = HYPERVISOR_xenpmu_op(XENPMU_feature_set, &xp);
> + if (ret)
> + return ret;
> +
> + return len;
> +}
> +
> +static ssize_t pmu_features_show(struct hyp_sysfs_attr *attr, char *buffer)
> +{
> + int ret;
> + struct xen_pmu_params xp;
> +
> + ret = HYPERVISOR_xenpmu_op(XENPMU_feature_get, &xp);
> + if (ret)
> + return ret;
> +
> + return sprintf(buffer, "0x%x\n", (uint32_t)xp.val);
> +}
> +HYPERVISOR_ATTR_RW(pmu_features);
> +
> +static struct attribute *xen_pmu_attrs[] = {
> + &pmu_mode_attr.attr,
> + &pmu_features_attr.attr,
> + NULL
> +};
> +
> +static const struct attribute_group xen_pmu_group = {
> + .name = "pmu",
> + .attrs = xen_pmu_attrs,
> +};
> +
> +static int __init xen_pmu_init(void)
> +{
> + return sysfs_create_group(hypervisor_kobj, &xen_pmu_group);
> +}
> +
> +static void xen_pmu_destroy(void)
> +{
> + sysfs_remove_group(hypervisor_kobj, &xen_pmu_group);
> +}
> +
> static int __init hyper_sysfs_init(void)
> {
> int ret;
> @@ -390,9 +501,16 @@ static int __init hyper_sysfs_init(void)
> ret = xen_properties_init();
> if (ret)
> goto prop_out;
> + if (xen_initial_domain()) {
> + ret = xen_pmu_init();
> + if (ret)
> + goto pmu_out;
> + }
>
> goto out;
>
> +pmu_out:
> + xen_properties_destroy();
> prop_out:
> xen_sysfs_uuid_destroy();
> uuid_out:
> @@ -407,6 +525,7 @@ out:
>
> static void __exit hyper_sysfs_exit(void)
> {
> + xen_pmu_destroy();
> xen_properties_destroy();
> xen_compilation_destroy();
> xen_sysfs_uuid_destroy();
> diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
> index 0cd5ca3..d071e4a 100644
> --- a/include/xen/interface/xen.h
> +++ b/include/xen/interface/xen.h
> @@ -58,6 +58,7 @@
> #define __HYPERVISOR_physdev_op 33
> #define __HYPERVISOR_hvm_op 34
> #define __HYPERVISOR_tmem_op 38
> +#define __HYPERVISOR_xenpmu_op 40
>
> /* Architecture-specific hypercall definitions. */
> #define __HYPERVISOR_arch_0 48
> diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
> new file mode 100644
> index 0000000..0d15d3a
> --- /dev/null
> +++ b/include/xen/interface/xenpmu.h
> @@ -0,0 +1,49 @@
> +#ifndef __XEN_PUBLIC_XENPMU_H__
> +#define __XEN_PUBLIC_XENPMU_H__
> +
> +#include "xen.h"
> +
> +#define XENPMU_VER_MAJ 0
> +#define XENPMU_VER_MIN 1
> +
> +/* HYPERVISOR_xenpmu_op commands */
> +#define XENPMU_mode_get 0
> +#define XENPMU_mode_set 1
> +#define XENPMU_feature_get 2
> +#define XENPMU_feature_set 3
> +
> +/* Parameter structure for HYPERVISOR_xenpmu_op call */
> +struct xen_pmu_params {
> + union {
> + struct version {
> + uint8_t maj;
> + uint8_t min;
> + } version;
> + uint64_t pad;
> + };
> + union {
> + uint64_t val;
> + GUEST_HANDLE(void) valp;
> + };
> +};
> +
> +/* PMU modes:
> + * - XENPMU_MODE_OFF: No PMU virtualization
> + * - XENPMU_MODE_ON: Guests can profile themselves, dom0 profiles
> + * itself and Xen
> + * - XENPMU_MODE_PRIV: Only dom0 has access to VPMU and it profiles
> + * everyone: itself, the hypervisor and the guests.
> + */
> +#define XENPMU_MODE_OFF 0
> +#define XENPMU_MODE_ON (1<<0)
> +#define XENPMU_MODE_PRIV (1<<1)
> +
> +
> +/*
> + * PMU features:
> + * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD)
> + */
> +#define XENPMU_FEATURE_INTEL_BTS 1
Is this XENPMU_FEATURE_INTEL_BTS really needed on the the linux side?
I would think it's a hypervisor internal feature.
Dietmar.
> +
> +#endif /* __XEN_PUBLIC_XENPMU_H__ */
>
--
Company details: http://ts.fujitsu.com/imprint.html
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode
2014-06-11 10:13 ` Dietmar Hahn
@ 2014-06-11 12:53 ` Boris Ostrovsky
2014-06-11 13:12 ` Dietmar Hahn
0 siblings, 1 reply; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-11 12:53 UTC (permalink / raw)
To: Dietmar Hahn; +Cc: kevin.tian, david.vrabel, JBeulich, xen-devel
On 06/11/2014 06:13 AM, Dietmar Hahn wrote:
>
> +
> +
> +/*
> + * PMU features:
> + * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD)
> + */
> +#define XENPMU_FEATURE_INTEL_BTS 1
> Is this XENPMU_FEATURE_INTEL_BTS really needed on the the linux side?
> I would think it's a hypervisor internal feature.
Features are controlled (enabled/disabled) by dom0 via
/sys/hypervisor/pmu/pmu_features so we need to have dom0 and the
hypervisor agree on what the bits mean.
-boris
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode
2014-06-11 12:53 ` Boris Ostrovsky
@ 2014-06-11 13:12 ` Dietmar Hahn
0 siblings, 0 replies; 30+ messages in thread
From: Dietmar Hahn @ 2014-06-11 13:12 UTC (permalink / raw)
To: xen-devel; +Cc: Boris Ostrovsky, kevin.tian, JBeulich, david.vrabel
Am Mittwoch 11 Juni 2014, 08:53:13 schrieb Boris Ostrovsky:
> On 06/11/2014 06:13 AM, Dietmar Hahn wrote:
> >
> > +
> > +
> > +/*
> > + * PMU features:
> > + * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD)
> > + */
> > +#define XENPMU_FEATURE_INTEL_BTS 1
> > Is this XENPMU_FEATURE_INTEL_BTS really needed on the the linux side?
> > I would think it's a hypervisor internal feature.
>
>
> Features are controlled (enabled/disabled) by dom0 via
> /sys/hypervisor/pmu/pmu_features so we need to have dom0 and the
> hypervisor agree on what the bits mean.
OK, I understand. Then I'll find it in the user land part.
Dietmar.
>
> -boris
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH v2 3/6] xen/PMU: Initialization code for Xen PMU
2014-06-06 17:44 [PATCH v2 0/6] xen/PMU: PMU support for Xen PV guests Boris Ostrovsky
2014-06-06 17:44 ` [PATCH v2 1/6] xen: xensyms support Boris Ostrovsky
2014-06-06 17:44 ` [PATCH v2 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode Boris Ostrovsky
@ 2014-06-06 17:44 ` Boris Ostrovsky
2014-06-06 18:57 ` Andrew Cooper
2014-06-06 17:44 ` [PATCH v2 4/6] xen/PMU: Describe vendor-specific PMU registers Boris Ostrovsky
` (3 subsequent siblings)
6 siblings, 1 reply; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-06 17:44 UTC (permalink / raw)
To: david.vrabel, konrad.wilk
Cc: boris.ostrovsky, kevin.tian, dietmar.hahn, JBeulich, xen-devel
Map shared data structure that will hold CPU registers, VPMU context, V/PCPU IDs
of the CPU interrupted by PMU interrupt. Hypervisor fills this information in
its handler and passes it to the guest for further processing.
Set up PMU VIRQ.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
arch/x86/include/asm/xen/interface.h | 41 ++++++++++
arch/x86/xen/Makefile | 2 +-
arch/x86/xen/pmu.c | 146 +++++++++++++++++++++++++++++++++++
arch/x86/xen/pmu.h | 11 +++
arch/x86/xen/smp.c | 31 +++++++-
include/xen/interface/xen.h | 1 +
include/xen/interface/xenpmu.h | 19 +++++
7 files changed, 249 insertions(+), 2 deletions(-)
create mode 100644 arch/x86/xen/pmu.c
create mode 100644 arch/x86/xen/pmu.h
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index fd9cb76..c4b92d3 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -169,6 +169,47 @@ struct vcpu_guest_context {
#endif
};
DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
+
+/* AMD PMU registers and structures */
+struct xen_pmu_amd_ctxt {
+ uint32_t counters; /* Offset to counter MSRs */
+ uint32_t ctrls; /* Offset to control MSRs */
+};
+
+/* Intel PMU registers and structures */
+struct xen_pmu_cntr_pair {
+ uint64_t counter;
+ uint64_t control;
+};
+
+struct xen_pmu_intel_ctxt {
+ uint64_t global_ctrl;
+ uint64_t global_ovf_ctrl;
+ uint64_t global_status;
+ uint64_t fixed_ctrl;
+ uint64_t ds_area;
+ uint64_t pebs_enable;
+ uint64_t debugctl;
+ uint32_t fixed_counters; /* Offset to fixed counter MSRs */
+ uint32_t arch_counters; /* Offset to architectural counter MSRs */
+};
+
+struct xen_arch_pmu {
+ union {
+ struct cpu_user_regs regs;
+ uint8_t pad1[256];
+ };
+ union {
+ uint32_t lapic_lvtpc;
+ uint64_t pad2;
+ };
+ union {
+ struct xen_pmu_amd_ctxt amd;
+ struct xen_pmu_intel_ctxt intel;
+#define XENPMU_CTXT_PAD_SZ 128
+ uint8_t pad3[XENPMU_CTXT_PAD_SZ];
+ };
+};
#endif /* !__ASSEMBLY__ */
/*
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 96ab2c0..b187df5 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -13,7 +13,7 @@ CFLAGS_mmu.o := $(nostackp)
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
time.o xen-asm.o xen-asm_$(BITS).o \
grant-table.o suspend.o platform-pci-unplug.o \
- p2m.o
+ p2m.o pmu.o
obj-$(CONFIG_EVENT_TRACING) += trace.o
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
new file mode 100644
index 0000000..65c3767
--- /dev/null
+++ b/arch/x86/xen/pmu.c
@@ -0,0 +1,146 @@
+#include <linux/types.h>
+#include <linux/interrupt.h>
+
+#include <asm/xen/hypercall.h>
+#include <xen/page.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
+#include <xen/interface/xenpmu.h>
+
+#include "xen-ops.h"
+#include "pmu.h"
+
+/* x86_pmu.handle_irq definition */
+#include <../kernel/cpu/perf_event.h>
+
+
+/* Shared page between hypervisor and domain */
+DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
+#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id());
+
+/* perf callbacks*/
+int xen_is_in_guest(void)
+{
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+ return 0;
+ }
+
+ if (!xen_initial_domain() ||
+ xenpmu_data->domain_id > DOMID_SELF || xenpmu_data->domain_id == 0)
+ return 0;
+
+ return 1;
+}
+
+static int xen_is_user_mode(void)
+{
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+ return 0;
+ }
+
+ return ((xenpmu_data->pmu.regs.cs & 3) == 3);
+}
+
+static unsigned long xen_get_guest_ip(void)
+{
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+ return 0;
+ }
+
+ return xenpmu_data->pmu.regs.eip;
+}
+
+static struct perf_guest_info_callbacks xen_guest_cbs = {
+ .is_in_guest = xen_is_in_guest,
+ .is_user_mode = xen_is_user_mode,
+ .get_guest_ip = xen_get_guest_ip,
+};
+
+/* Convert registers from Xen's format to Linux' */
+static void xen_convert_regs(struct cpu_user_regs *xen_regs,
+ struct pt_regs *regs)
+{
+ regs->ip = xen_regs->eip;
+ regs->cs = xen_regs->cs;
+}
+
+irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
+{
+ int ret = IRQ_NONE;
+ struct pt_regs regs;
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+ return ret;
+ }
+
+ xen_convert_regs(&xenpmu_data->pmu.regs, ®s);
+ if (x86_pmu.handle_irq(®s))
+ ret = IRQ_HANDLED;
+
+ return ret;
+}
+
+bool is_xen_pmu(int cpu)
+{
+ return (per_cpu(xenpmu_shared, cpu) != NULL);
+}
+
+int xen_pmu_init(int cpu)
+{
+ int ret = 0;
+ struct xen_pmu_params xp;
+ unsigned long pfn;
+ struct xen_pmu_data *xenpmu_data;
+
+ BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
+ xenpmu_data = vzalloc(PAGE_SIZE);
+ if (!xenpmu_data) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ pfn = vmalloc_to_pfn((char *)xenpmu_data);
+
+ xp.val = pfn_to_mfn(pfn);
+ xp.vcpu = cpu;
+ xp.version.maj = XENPMU_VER_MAJ;
+ xp.version.min = XENPMU_VER_MIN;
+ ret = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
+ if (ret)
+ goto fail;
+
+ per_cpu(xenpmu_shared, cpu) = xenpmu_data;
+
+ if (cpu == 0)
+ perf_register_guest_info_callbacks(&xen_guest_cbs);
+
+ return ret;
+
+fail:
+ vfree(xenpmu_data);
+ return ret;
+}
+
+void xen_pmu_finish(int cpu)
+{
+ struct xen_pmu_params xp;
+
+ xp.vcpu = cpu;
+ xp.version.maj = XENPMU_VER_MAJ;
+ xp.version.min = XENPMU_VER_MIN;
+
+ (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
+
+ vfree(per_cpu(xenpmu_shared, cpu));
+ per_cpu(xenpmu_shared, cpu) = NULL;
+}
diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h
new file mode 100644
index 0000000..d52e8db
--- /dev/null
+++ b/arch/x86/xen/pmu.h
@@ -0,0 +1,11 @@
+#ifndef __XEN_PMU_H
+#define __XEN_PMU_H
+
+#include <xen/interface/xenpmu.h>
+
+irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
+int xen_pmu_init(int cpu);
+void xen_pmu_finish(int cpu);
+bool is_xen_pmu(int cpu);
+
+#endif /* __XEN_PMU_H */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7005974..7ea6296 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -26,6 +26,7 @@
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
+#include <xen/interface/xenpmu.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
@@ -37,6 +38,7 @@
#include <xen/hvc-console.h>
#include "xen-ops.h"
#include "mmu.h"
+#include "pmu.h"
cpumask_var_t xen_cpu_initialized_map;
@@ -49,6 +51,7 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
+static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@ -147,11 +150,18 @@ static void xen_smp_intr_free(unsigned int cpu)
kfree(per_cpu(xen_irq_work, cpu).name);
per_cpu(xen_irq_work, cpu).name = NULL;
}
+
+ if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
+ unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
+ per_cpu(xen_pmu_irq, cpu).irq = -1;
+ kfree(per_cpu(xen_pmu_irq, cpu).name);
+ per_cpu(xen_pmu_irq, cpu).name = NULL;
+ }
};
static int xen_smp_intr_init(unsigned int cpu)
{
int rc;
- char *resched_name, *callfunc_name, *debug_name;
+ char *resched_name, *callfunc_name, *debug_name, *pmu_name;
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -217,6 +227,18 @@ static int xen_smp_intr_init(unsigned int cpu)
per_cpu(xen_irq_work, cpu).irq = rc;
per_cpu(xen_irq_work, cpu).name = callfunc_name;
+ if (is_xen_pmu(cpu)) {
+ pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
+ rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
+ xen_pmu_irq_handler,
+ IRQF_PERCPU|IRQF_NOBALANCING,
+ pmu_name, NULL);
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_pmu_irq, cpu).irq = rc;
+ per_cpu(xen_pmu_irq, cpu).name = pmu_name;
+ }
+
return 0;
fail:
@@ -334,6 +356,9 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
}
set_cpu_sibling_map(0);
+ if (xen_pmu_init(0))
+ pr_err("Could not initialize VPMU for VCPU 0\n");
+
if (xen_smp_intr_init(0))
BUG();
@@ -463,6 +488,9 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
+ if (xen_pmu_init(cpu))
+ pr_err("Could not initialize VPMU for VCPU %u\n", cpu);
+
rc = xen_smp_intr_init(cpu);
if (rc)
return rc;
@@ -504,6 +532,7 @@ static void xen_cpu_die(unsigned int cpu)
xen_smp_intr_free(cpu);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
+ xen_pmu_finish(cpu);
}
static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index d071e4a..f48f584 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -81,6 +81,7 @@
#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */
#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
#define VIRQ_PCPU_STATE 9 /* (DOM0) PCPU state changed */
+#define VIRQ_XENPMU 13 /* PMC interrupt */
/* Architecture-specific VIRQ definitions. */
#define VIRQ_ARCH_0 16
diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
index 0d15d3a..ed00245 100644
--- a/include/xen/interface/xenpmu.h
+++ b/include/xen/interface/xenpmu.h
@@ -11,6 +11,8 @@
#define XENPMU_mode_set 1
#define XENPMU_feature_get 2
#define XENPMU_feature_set 3
+#define XENPMU_init 4
+#define XENPMU_finish 5
/* Parameter structure for HYPERVISOR_xenpmu_op call */
struct xen_pmu_params {
@@ -25,6 +27,8 @@ struct xen_pmu_params {
uint64_t val;
GUEST_HANDLE(void) valp;
};
+
+ uint64_t vcpu;
};
/* PMU modes:
@@ -45,5 +49,20 @@ struct xen_pmu_params {
*/
#define XENPMU_FEATURE_INTEL_BTS 1
+/*
+ * PMU MSRs are cached in the context so the PV guest doesn't need to trap to
+ * the hypervisor
+ */
+#define PMU_CACHED 1
+
+/* Shared between hypervisor and PV domain */
+struct xen_pmu_data {
+ uint32_t domain_id;
+ uint32_t vcpu_id;
+ uint32_t pcpu_id;
+ uint32_t pmu_flags;
+
+ struct xen_arch_pmu pmu;
+};
#endif /* __XEN_PUBLIC_XENPMU_H__ */
--
1.8.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH v2 3/6] xen/PMU: Initialization code for Xen PMU
2014-06-06 17:44 ` [PATCH v2 3/6] xen/PMU: Initialization code for Xen PMU Boris Ostrovsky
@ 2014-06-06 18:57 ` Andrew Cooper
2014-06-06 19:51 ` Boris Ostrovsky
0 siblings, 1 reply; 30+ messages in thread
From: Andrew Cooper @ 2014-06-06 18:57 UTC (permalink / raw)
To: Boris Ostrovsky
Cc: kevin.tian, dietmar.hahn, xen-devel, david.vrabel, JBeulich
On 06/06/14 18:44, Boris Ostrovsky wrote:
> Map shared data structure that will hold CPU registers, VPMU context, V/PCPU IDs
> of the CPU interrupted by PMU interrupt. Hypervisor fills this information in
> its handler and passes it to the guest for further processing.
>
> Set up PMU VIRQ.
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> ---
> arch/x86/include/asm/xen/interface.h | 41 ++++++++++
> arch/x86/xen/Makefile | 2 +-
> arch/x86/xen/pmu.c | 146 +++++++++++++++++++++++++++++++++++
> arch/x86/xen/pmu.h | 11 +++
> arch/x86/xen/smp.c | 31 +++++++-
> include/xen/interface/xen.h | 1 +
> include/xen/interface/xenpmu.h | 19 +++++
> 7 files changed, 249 insertions(+), 2 deletions(-)
> create mode 100644 arch/x86/xen/pmu.c
> create mode 100644 arch/x86/xen/pmu.h
>
> diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
> index fd9cb76..c4b92d3 100644
> --- a/arch/x86/include/asm/xen/interface.h
> +++ b/arch/x86/include/asm/xen/interface.h
> @@ -169,6 +169,47 @@ struct vcpu_guest_context {
> #endif
> };
> DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
> +
> +/* AMD PMU registers and structures */
> +struct xen_pmu_amd_ctxt {
> + uint32_t counters; /* Offset to counter MSRs */
> + uint32_t ctrls; /* Offset to control MSRs */
> +};
> +
> +/* Intel PMU registers and structures */
> +struct xen_pmu_cntr_pair {
> + uint64_t counter;
> + uint64_t control;
> +};
> +
> +struct xen_pmu_intel_ctxt {
> + uint64_t global_ctrl;
> + uint64_t global_ovf_ctrl;
> + uint64_t global_status;
> + uint64_t fixed_ctrl;
> + uint64_t ds_area;
> + uint64_t pebs_enable;
> + uint64_t debugctl;
> + uint32_t fixed_counters; /* Offset to fixed counter MSRs */
> + uint32_t arch_counters; /* Offset to architectural counter MSRs */
> +};
> +
> +struct xen_arch_pmu {
> + union {
> + struct cpu_user_regs regs;
> + uint8_t pad1[256];
> + };
> + union {
> + uint32_t lapic_lvtpc;
> + uint64_t pad2;
> + };
> + union {
> + struct xen_pmu_amd_ctxt amd;
> + struct xen_pmu_intel_ctxt intel;
> +#define XENPMU_CTXT_PAD_SZ 128
> + uint8_t pad3[XENPMU_CTXT_PAD_SZ];
> + };
> +};
> #endif /* !__ASSEMBLY__ */
You appear to have a define for XENPMU_CTXT_PAD_SZ but not for any other
bits of padding.
Also, I presume there is no sensible way to coalesce the Intel and AMD
variations?
>
> /*
> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
> index 96ab2c0..b187df5 100644
> --- a/arch/x86/xen/Makefile
> +++ b/arch/x86/xen/Makefile
> @@ -13,7 +13,7 @@ CFLAGS_mmu.o := $(nostackp)
> obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
> time.o xen-asm.o xen-asm_$(BITS).o \
> grant-table.o suspend.o platform-pci-unplug.o \
> - p2m.o
> + p2m.o pmu.o
>
> obj-$(CONFIG_EVENT_TRACING) += trace.o
>
> diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
> new file mode 100644
> index 0000000..65c3767
> --- /dev/null
> +++ b/arch/x86/xen/pmu.c
> @@ -0,0 +1,146 @@
> +#include <linux/types.h>
> +#include <linux/interrupt.h>
> +
> +#include <asm/xen/hypercall.h>
> +#include <xen/page.h>
> +#include <xen/interface/xen.h>
> +#include <xen/interface/vcpu.h>
> +#include <xen/interface/xenpmu.h>
> +
> +#include "xen-ops.h"
> +#include "pmu.h"
> +
> +/* x86_pmu.handle_irq definition */
> +#include <../kernel/cpu/perf_event.h>
System include with relative path?
> +
> +
> +/* Shared page between hypervisor and domain */
> +DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
> +#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id());
> +
> +/* perf callbacks*/
> +int xen_is_in_guest(void)
> +{
> + struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
const
> +
> + if (!xenpmu_data) {
> + WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
> + return 0;
> + }
> +
> + if (!xen_initial_domain() ||
> + xenpmu_data->domain_id > DOMID_SELF || xenpmu_data->domain_id == 0)
> + return 0;
Why is dom0 special, and is it sensible to hard code a 0 here?
> +
> + return 1;
> +}
> +
> +static int xen_is_user_mode(void)
> +{
> + struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
const
> +
> + if (!xenpmu_data) {
> + WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
> + return 0;
> + }
> +
> + return ((xenpmu_data->pmu.regs.cs & 3) == 3);
> +}
> +
> +static unsigned long xen_get_guest_ip(void)
> +{
> + struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
const
> +
> + if (!xenpmu_data) {
> + WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
> + return 0;
> + }
> +
> + return xenpmu_data->pmu.regs.eip;
> +}
> +
> +static struct perf_guest_info_callbacks xen_guest_cbs = {
> + .is_in_guest = xen_is_in_guest,
> + .is_user_mode = xen_is_user_mode,
> + .get_guest_ip = xen_get_guest_ip,
> +};
> +
> +/* Convert registers from Xen's format to Linux' */
> +static void xen_convert_regs(struct cpu_user_regs *xen_regs,
> + struct pt_regs *regs)
const xen_regs
> +{
> + regs->ip = xen_regs->eip;
> + regs->cs = xen_regs->cs;
> +}
> +
> +irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
> +{
> + int ret = IRQ_NONE;
> + struct pt_regs regs;
> + struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
const
~Andrew
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 3/6] xen/PMU: Initialization code for Xen PMU
2014-06-06 18:57 ` Andrew Cooper
@ 2014-06-06 19:51 ` Boris Ostrovsky
0 siblings, 0 replies; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-06 19:51 UTC (permalink / raw)
To: Andrew Cooper; +Cc: kevin.tian, dietmar.hahn, xen-devel, david.vrabel, JBeulich
On 06/06/2014 02:57 PM, Andrew Cooper wrote:
> On 06/06/14 18:44, Boris Ostrovsky wrote:
>> Map shared data structure that will hold CPU registers, VPMU context, V/PCPU IDs
>> of the CPU interrupted by PMU interrupt. Hypervisor fills this information in
>> its handler and passes it to the guest for further processing.
>>
>> Set up PMU VIRQ.
>>
>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>> ---
>> arch/x86/include/asm/xen/interface.h | 41 ++++++++++
>> arch/x86/xen/Makefile | 2 +-
>> arch/x86/xen/pmu.c | 146 +++++++++++++++++++++++++++++++++++
>> arch/x86/xen/pmu.h | 11 +++
>> arch/x86/xen/smp.c | 31 +++++++-
>> include/xen/interface/xen.h | 1 +
>> include/xen/interface/xenpmu.h | 19 +++++
>> 7 files changed, 249 insertions(+), 2 deletions(-)
>> create mode 100644 arch/x86/xen/pmu.c
>> create mode 100644 arch/x86/xen/pmu.h
>>
>> diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
>> index fd9cb76..c4b92d3 100644
>> --- a/arch/x86/include/asm/xen/interface.h
>> +++ b/arch/x86/include/asm/xen/interface.h
>> @@ -169,6 +169,47 @@ struct vcpu_guest_context {
>> #endif
>> };
>> DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
>> +
>> +/* AMD PMU registers and structures */
>> +struct xen_pmu_amd_ctxt {
>> + uint32_t counters; /* Offset to counter MSRs */
>> + uint32_t ctrls; /* Offset to control MSRs */
>> +};
>> +
>> +/* Intel PMU registers and structures */
>> +struct xen_pmu_cntr_pair {
>> + uint64_t counter;
>> + uint64_t control;
>> +};
>> +
>> +struct xen_pmu_intel_ctxt {
>> + uint64_t global_ctrl;
>> + uint64_t global_ovf_ctrl;
>> + uint64_t global_status;
>> + uint64_t fixed_ctrl;
>> + uint64_t ds_area;
>> + uint64_t pebs_enable;
>> + uint64_t debugctl;
>> + uint32_t fixed_counters; /* Offset to fixed counter MSRs */
>> + uint32_t arch_counters; /* Offset to architectural counter MSRs */
>> +};
>> +
>> +struct xen_arch_pmu {
>> + union {
>> + struct cpu_user_regs regs;
>> + uint8_t pad1[256];
>> + };
>> + union {
>> + uint32_t lapic_lvtpc;
>> + uint64_t pad2;
>> + };
>> + union {
>> + struct xen_pmu_amd_ctxt amd;
>> + struct xen_pmu_intel_ctxt intel;
>> +#define XENPMU_CTXT_PAD_SZ 128
>> + uint8_t pad3[XENPMU_CTXT_PAD_SZ];
>> + };
>> +};
>> #endif /* !__ASSEMBLY__ */
> You appear to have a define for XENPMU_CTXT_PAD_SZ but not for any other
> bits of padding.
The only reason for the define is because of the BUILD_BUG_ON test (in
hypervisor). I suppose I could add another define for registers and do a
similar test.
>
> Also, I presume there is no sensible way to coalesce the Intel and AMD
> variations?
Oh no. They are *completely* different implementations, with absolutely
nothing in common.
-boris
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH v2 4/6] xen/PMU: Describe vendor-specific PMU registers
2014-06-06 17:44 [PATCH v2 0/6] xen/PMU: PMU support for Xen PV guests Boris Ostrovsky
` (2 preceding siblings ...)
2014-06-06 17:44 ` [PATCH v2 3/6] xen/PMU: Initialization code for Xen PMU Boris Ostrovsky
@ 2014-06-06 17:44 ` Boris Ostrovsky
2014-06-10 14:11 ` David Vrabel
2014-06-06 17:44 ` [PATCH v2 5/6] xen/PMU: Intercept PMU-related MSR and APIC accesses Boris Ostrovsky
` (2 subsequent siblings)
6 siblings, 1 reply; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-06 17:44 UTC (permalink / raw)
To: david.vrabel, konrad.wilk
Cc: boris.ostrovsky, kevin.tian, dietmar.hahn, JBeulich, xen-devel
AMD and Intel PMU register initialization and helpers that determine whether a
register belongs to PMU.
This and some of subsequent PMU emulation code is somewhat similar to Xen's PMU
implementation.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
arch/x86/xen/pmu.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 145 insertions(+), 1 deletion(-)
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 65c3767..f1bec27 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -18,6 +18,148 @@
DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id());
+
+/* AMD PMU */
+#define F15H_NUM_COUNTERS 6
+#define F10H_NUM_COUNTERS 4
+
+static __read_mostly uint32_t amd_counters_base;
+static __read_mostly uint32_t amd_ctrls_base;
+static __read_mostly int amd_msr_step;
+static __read_mostly int k7_counters_mirrored;
+static __read_mostly int amd_num_counters;
+
+/* Intel PMU */
+#define MSR_TYPE_COUNTER 0
+#define MSR_TYPE_CTRL 1
+#define MSR_TYPE_GLOBAL 2
+#define MSR_TYPE_ARCH_COUNTER 3
+#define MSR_TYPE_ARCH_CTRL 4
+
+/* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */
+#define PMU_GENERAL_NR_SHIFT 8
+#define PMU_GENERAL_NR_BITS 8
+#define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) \
+ << PMU_GENERAL_NR_SHIFT)
+
+/* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */
+#define PMU_FIXED_NR_SHIFT 0
+#define PMU_FIXED_NR_BITS 5
+#define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) - 1) \
+ << PMU_FIXED_NR_SHIFT)
+
+static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;
+
+
+static void xen_pmu_arch_init(void)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+
+ switch (boot_cpu_data.x86) {
+ case 0x15:
+ amd_num_counters = F15H_NUM_COUNTERS;
+ amd_counters_base = MSR_F15H_PERF_CTR;
+ amd_ctrls_base = MSR_F15H_PERF_CTL;
+ amd_msr_step = 2;
+ k7_counters_mirrored = 1;
+ break;
+ case 0x10:
+ case 0x12:
+ case 0x14:
+ case 0x16:
+ default:
+ amd_num_counters = F10H_NUM_COUNTERS;
+ amd_counters_base = MSR_K7_PERFCTR0;
+ amd_ctrls_base = MSR_K7_EVNTSEL0;
+ amd_msr_step = 1;
+ k7_counters_mirrored = 0;
+ break;
+ }
+ } else {
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(0xa, &eax, &ebx, &ecx, &edx);
+
+ intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >>
+ PMU_GENERAL_NR_SHIFT;
+ intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >>
+ PMU_FIXED_NR_SHIFT;
+ }
+}
+
+static inline uint32_t get_fam15h_addr(u32 addr)
+{
+ switch (addr) {
+ case MSR_K7_PERFCTR0:
+ case MSR_K7_PERFCTR1:
+ case MSR_K7_PERFCTR2:
+ case MSR_K7_PERFCTR3:
+ return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0);
+ case MSR_K7_EVNTSEL0:
+ case MSR_K7_EVNTSEL1:
+ case MSR_K7_EVNTSEL2:
+ case MSR_K7_EVNTSEL3:
+ return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0);
+ default:
+ break;
+ }
+
+ return addr;
+}
+
+static inline bool is_amd_pmu_msr(unsigned int msr)
+{
+ if ((msr >= MSR_F15H_PERF_CTL &&
+ msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) ||
+ (msr >= MSR_K7_EVNTSEL0 &&
+ msr < MSR_K7_PERFCTR0 + amd_num_counters))
+ return true;
+
+ return false;
+}
+
+static bool is_intel_pmu_msr(u32 msr_index, int *type, int *index)
+{
+ int i;
+
+ for (i = 0; i < intel_num_fixed_counters; i++) {
+ if (msr_index == MSR_CORE_PERF_FIXED_CTR0 + i) {
+ *type = MSR_TYPE_COUNTER;
+ *index = i;
+ return true;
+ }
+ }
+
+ if ((msr_index == MSR_CORE_PERF_FIXED_CTR_CTRL) ||
+ (msr_index == MSR_IA32_DS_AREA) ||
+ (msr_index == MSR_IA32_PEBS_ENABLE)) {
+ *type = MSR_TYPE_CTRL;
+ return true;
+ }
+
+ if ((msr_index == MSR_CORE_PERF_GLOBAL_CTRL) ||
+ (msr_index == MSR_CORE_PERF_GLOBAL_STATUS) ||
+ (msr_index == MSR_CORE_PERF_GLOBAL_OVF_CTRL)) {
+ *type = MSR_TYPE_GLOBAL;
+ return true;
+ }
+
+ if ((msr_index >= MSR_IA32_PERFCTR0) &&
+ (msr_index < (MSR_IA32_PERFCTR0 + intel_num_arch_counters))) {
+ *type = MSR_TYPE_ARCH_COUNTER;
+ *index = msr_index - MSR_IA32_PERFCTR0;
+ return true;
+ }
+
+ if ((msr_index >= MSR_P6_EVNTSEL0) &&
+ (msr_index < (MSR_P6_EVNTSEL0 + intel_num_arch_counters))) {
+ *type = MSR_TYPE_ARCH_CTRL;
+ *index = msr_index - MSR_P6_EVNTSEL0;
+ return true;
+ }
+ return false;
+}
+
/* perf callbacks*/
int xen_is_in_guest(void)
{
@@ -121,8 +263,10 @@ int xen_pmu_init(int cpu)
per_cpu(xenpmu_shared, cpu) = xenpmu_data;
- if (cpu == 0)
+ if (cpu == 0) {
perf_register_guest_info_callbacks(&xen_guest_cbs);
+ xen_pmu_arch_init();
+ }
return ret;
--
1.8.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH v2 4/6] xen/PMU: Describe vendor-specific PMU registers
2014-06-06 17:44 ` [PATCH v2 4/6] xen/PMU: Describe vendor-specific PMU registers Boris Ostrovsky
@ 2014-06-10 14:11 ` David Vrabel
2014-06-10 15:29 ` Boris Ostrovsky
0 siblings, 1 reply; 30+ messages in thread
From: David Vrabel @ 2014-06-10 14:11 UTC (permalink / raw)
To: Boris Ostrovsky, konrad.wilk
Cc: kevin.tian, dietmar.hahn, JBeulich, xen-devel
On 06/06/14 18:44, Boris Ostrovsky wrote:
> AMD and Intel PMU register initialization and helpers that determine whether a
> register belongs to PMU.
>
> This and some of subsequent PMU emulation code is somewhat similar to Xen's PMU
> implementation.
This and patches 5 and 6 look ok but they seem to be split somewhat
arbitrarily. Is it still bisectable as-is?
David
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 4/6] xen/PMU: Describe vendor-specific PMU registers
2014-06-10 14:11 ` David Vrabel
@ 2014-06-10 15:29 ` Boris Ostrovsky
0 siblings, 0 replies; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-10 15:29 UTC (permalink / raw)
To: David Vrabel; +Cc: kevin.tian, xen-devel, dietmar.hahn, JBeulich
On 06/10/2014 10:11 AM, David Vrabel wrote:
> On 06/06/14 18:44, Boris Ostrovsky wrote:
>> AMD and Intel PMU register initialization and helpers that determine whether a
>> register belongs to PMU.
>>
>> This and some of subsequent PMU emulation code is somewhat similar to Xen's PMU
>> implementation.
> This and patches 5 and 6 look ok but they seem to be split somewhat
> arbitrarily. Is it still bisectable as-is?
I think so. Patch 6 is (well, was) meant to be a performance enhancement.
But I will check whether it is still true.
-boris
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH v2 5/6] xen/PMU: Intercept PMU-related MSR and APIC accesses
2014-06-06 17:44 [PATCH v2 0/6] xen/PMU: PMU support for Xen PV guests Boris Ostrovsky
` (3 preceding siblings ...)
2014-06-06 17:44 ` [PATCH v2 4/6] xen/PMU: Describe vendor-specific PMU registers Boris Ostrovsky
@ 2014-06-06 17:44 ` Boris Ostrovsky
2014-06-12 6:56 ` Dietmar Hahn
2014-06-06 17:44 ` [PATCH v2 6/6] xen/PMU: PMU emulation code Boris Ostrovsky
2014-06-10 13:57 ` [PATCH v2 0/6] xen/PMU: PMU support for Xen PV guests David Vrabel
6 siblings, 1 reply; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-06 17:44 UTC (permalink / raw)
To: david.vrabel, konrad.wilk
Cc: boris.ostrovsky, kevin.tian, dietmar.hahn, JBeulich, xen-devel
Provide interfaces for recognizing accesses to PMU-related MSRs and LVTPC APIC
and process these accesses in Xen PMU code.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
arch/x86/xen/enlighten.c | 24 ++++++++++--
arch/x86/xen/pmu.c | 84 ++++++++++++++++++++++++++++++++++++++++++
arch/x86/xen/pmu.h | 4 ++
include/xen/interface/xenpmu.h | 1 +
4 files changed, 110 insertions(+), 3 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 201d09a..a3e4db6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -82,6 +82,7 @@
#include "mmu.h"
#include "smp.h"
#include "multicalls.h"
+#include "pmu.h"
EXPORT_SYMBOL_GPL(hypercall_page);
@@ -964,6 +965,11 @@ static u32 xen_apic_read(u32 reg)
static void xen_apic_write(u32 reg, u32 val)
{
+ if (reg == APIC_LVTPC) {
+ (void)pmu_apic_update(reg);
+ return;
+ }
+
/* Warn to see if there's any stray references */
WARN_ON(1);
}
@@ -1068,6 +1074,17 @@ static inline void xen_write_cr8(unsigned long val)
BUG_ON(val);
}
#endif
+
+static u64 xen_read_msr_safe(unsigned int msr, int *err)
+{
+ u64 val;
+
+ if (pmu_msr_read(msr, &val, err))
+ return val;
+
+ return native_read_msr_safe(msr, err);
+}
+
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
{
int ret;
@@ -1108,7 +1125,8 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
break;
default:
- ret = native_write_msr_safe(msr, low, high);
+ if (!pmu_msr_write(msr, low, high, &ret))
+ ret = native_write_msr_safe(msr, low, high);
}
return ret;
@@ -1244,11 +1262,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.wbinvd = native_wbinvd,
- .read_msr = native_read_msr_safe,
+ .read_msr = xen_read_msr_safe,
.write_msr = xen_write_msr_safe,
.read_tsc = native_read_tsc,
- .read_pmc = native_read_pmc,
+ .read_pmc = xen_read_pmc,
.read_tscp = native_read_tscp,
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index f1bec27..f92d406 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -160,6 +160,90 @@ static bool is_intel_pmu_msr(u32 msr_index, int *type, int *index)
return false;
}
+bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
+{
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ if (is_amd_pmu_msr(msr)) {
+ *val = native_read_msr_safe(msr, err);
+ return true;
+ }
+ } else {
+ int type, index;
+ if (is_intel_pmu_msr(msr, &type, &index)) {
+ *val = native_read_msr_safe(msr, err);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ if (is_amd_pmu_msr(msr)) {
+ *err = native_write_msr_safe(msr, low, high);
+ return true;
+ }
+ } else {
+ int type, index;
+
+ if (is_intel_pmu_msr(msr, &type, &index)) {
+ *err = native_write_msr_safe(msr, low, high);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned long long xen_amd_read_pmc(int counter)
+{
+ uint32_t msr;
+ int err;
+
+ msr = amd_counters_base + (counter * amd_msr_step);
+ return native_read_msr_safe(msr, &err);
+}
+
+unsigned long long xen_intel_read_pmc(int counter)
+{
+ int err;
+ uint32_t msr;
+
+ if (counter & (1<<30))
+ msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
+ else
+ msr = MSR_IA32_PERFCTR0 + counter;
+
+ return native_read_msr_safe(msr, &err);
+}
+
+unsigned long long xen_read_pmc(int counter)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return xen_amd_read_pmc(counter);
+ else
+ return xen_intel_read_pmc(counter);
+}
+
+int pmu_apic_update(uint32_t val)
+{
+ int ret;
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+ return -EINVAL;
+ }
+
+ xenpmu_data->pmu.lapic_lvtpc = val;
+ ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
+
+ return ret;
+}
+
/* perf callbacks*/
int xen_is_in_guest(void)
{
diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h
index d52e8db..30bfbcf 100644
--- a/arch/x86/xen/pmu.h
+++ b/arch/x86/xen/pmu.h
@@ -7,5 +7,9 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
int xen_pmu_init(int cpu);
void xen_pmu_finish(int cpu);
bool is_xen_pmu(int cpu);
+bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err);
+bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err);
+int pmu_apic_update(uint32_t reg);
+unsigned long long xen_read_pmc(int counter);
#endif /* __XEN_PMU_H */
diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
index ed00245..79384e4 100644
--- a/include/xen/interface/xenpmu.h
+++ b/include/xen/interface/xenpmu.h
@@ -13,6 +13,7 @@
#define XENPMU_feature_set 3
#define XENPMU_init 4
#define XENPMU_finish 5
+#define XENPMU_lvtpc_set 6
/* Parameter structure for HYPERVISOR_xenpmu_op call */
struct xen_pmu_params {
--
1.8.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH v2 5/6] xen/PMU: Intercept PMU-related MSR and APIC accesses
2014-06-06 17:44 ` [PATCH v2 5/6] xen/PMU: Intercept PMU-related MSR and APIC accesses Boris Ostrovsky
@ 2014-06-12 6:56 ` Dietmar Hahn
2014-06-12 14:50 ` Boris Ostrovsky
0 siblings, 1 reply; 30+ messages in thread
From: Dietmar Hahn @ 2014-06-12 6:56 UTC (permalink / raw)
To: xen-devel; +Cc: kevin.tian, Boris Ostrovsky, david.vrabel, JBeulich
Am Freitag 06 Juni 2014, 13:44:45 schrieb Boris Ostrovsky:
> Provide interfaces for recognizing accesses to PMU-related MSRs and LVTPC APIC
> and process these accesses in Xen PMU code.
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> ---
> arch/x86/xen/enlighten.c | 24 ++++++++++--
> arch/x86/xen/pmu.c | 84 ++++++++++++++++++++++++++++++++++++++++++
> arch/x86/xen/pmu.h | 4 ++
> include/xen/interface/xenpmu.h | 1 +
> 4 files changed, 110 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 201d09a..a3e4db6 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -82,6 +82,7 @@
> #include "mmu.h"
> #include "smp.h"
> #include "multicalls.h"
> +#include "pmu.h"
>
> EXPORT_SYMBOL_GPL(hypercall_page);
>
> @@ -964,6 +965,11 @@ static u32 xen_apic_read(u32 reg)
>
> static void xen_apic_write(u32 reg, u32 val)
> {
> + if (reg == APIC_LVTPC) {
> + (void)pmu_apic_update(reg);
> + return;
> + }
> +
> /* Warn to see if there's any stray references */
> WARN_ON(1);
> }
> @@ -1068,6 +1074,17 @@ static inline void xen_write_cr8(unsigned long val)
> BUG_ON(val);
> }
> #endif
> +
> +static u64 xen_read_msr_safe(unsigned int msr, int *err)
> +{
> + u64 val;
> +
> + if (pmu_msr_read(msr, &val, err))
> + return val;
> +
> + return native_read_msr_safe(msr, err);
> +}
> +
> static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
> {
> int ret;
> @@ -1108,7 +1125,8 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
> break;
>
> default:
> - ret = native_write_msr_safe(msr, low, high);
> + if (!pmu_msr_write(msr, low, high, &ret))
> + ret = native_write_msr_safe(msr, low, high);
> }
>
> return ret;
> @@ -1244,11 +1262,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
>
> .wbinvd = native_wbinvd,
>
> - .read_msr = native_read_msr_safe,
> + .read_msr = xen_read_msr_safe,
> .write_msr = xen_write_msr_safe,
>
> .read_tsc = native_read_tsc,
> - .read_pmc = native_read_pmc,
> + .read_pmc = xen_read_pmc,
>
> .read_tscp = native_read_tscp,
>
> diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
> index f1bec27..f92d406 100644
> --- a/arch/x86/xen/pmu.c
> +++ b/arch/x86/xen/pmu.c
> @@ -160,6 +160,90 @@ static bool is_intel_pmu_msr(u32 msr_index, int *type, int *index)
> return false;
> }
>
> +bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
> +{
> +
> + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
> + if (is_amd_pmu_msr(msr)) {
> + *val = native_read_msr_safe(msr, err);
> + return true;
> + }
> + } else {
> + int type, index;
> + if (is_intel_pmu_msr(msr, &type, &index)) {
> + *val = native_read_msr_safe(msr, err);
> + return true;
> + }
> + }
> +
> + return false;
> +}
> +
> +bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
> +{
> + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
> + if (is_amd_pmu_msr(msr)) {
> + *err = native_write_msr_safe(msr, low, high);
> + return true;
> + }
> + } else {
> + int type, index;
> +
> + if (is_intel_pmu_msr(msr, &type, &index)) {
> + *err = native_write_msr_safe(msr, low, high);
> + return true;
> + }
> + }
> +
> + return false;
> +}
> +
> +unsigned long long xen_amd_read_pmc(int counter)
> +{
> + uint32_t msr;
> + int err;
> +
> + msr = amd_counters_base + (counter * amd_msr_step);
> + return native_read_msr_safe(msr, &err);
> +}
> +
> +unsigned long long xen_intel_read_pmc(int counter)
> +{
> + int err;
> + uint32_t msr;
> +
> + if (counter & (1<<30))
What means the 30? Should be a #define ...?
Dietmar.
> + msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
> + else
> + msr = MSR_IA32_PERFCTR0 + counter;
> +
> + return native_read_msr_safe(msr, &err);
> +}
> +
> +unsigned long long xen_read_pmc(int counter)
> +{
> + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
> + return xen_amd_read_pmc(counter);
> + else
> + return xen_intel_read_pmc(counter);
> +}
> +
> +int pmu_apic_update(uint32_t val)
> +{
> + int ret;
> + struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
> +
> + if (!xenpmu_data) {
> + WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
> + return -EINVAL;
> + }
> +
> + xenpmu_data->pmu.lapic_lvtpc = val;
> + ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
> +
> + return ret;
> +}
> +
> /* perf callbacks*/
> int xen_is_in_guest(void)
> {
> diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h
> index d52e8db..30bfbcf 100644
> --- a/arch/x86/xen/pmu.h
> +++ b/arch/x86/xen/pmu.h
> @@ -7,5 +7,9 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
> int xen_pmu_init(int cpu);
> void xen_pmu_finish(int cpu);
> bool is_xen_pmu(int cpu);
> +bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err);
> +bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err);
> +int pmu_apic_update(uint32_t reg);
> +unsigned long long xen_read_pmc(int counter);
>
> #endif /* __XEN_PMU_H */
> diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
> index ed00245..79384e4 100644
> --- a/include/xen/interface/xenpmu.h
> +++ b/include/xen/interface/xenpmu.h
> @@ -13,6 +13,7 @@
> #define XENPMU_feature_set 3
> #define XENPMU_init 4
> #define XENPMU_finish 5
> +#define XENPMU_lvtpc_set 6
>
> /* Parameter structure for HYPERVISOR_xenpmu_op call */
> struct xen_pmu_params {
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 5/6] xen/PMU: Intercept PMU-related MSR and APIC accesses
2014-06-12 6:56 ` Dietmar Hahn
@ 2014-06-12 14:50 ` Boris Ostrovsky
0 siblings, 0 replies; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-12 14:50 UTC (permalink / raw)
To: Dietmar Hahn; +Cc: kevin.tian, david.vrabel, JBeulich, xen-devel
On 06/12/2014 02:56 AM, Dietmar Hahn wrote:
>
> +
> +unsigned long long xen_intel_read_pmc(int counter)
> +{
> + int err;
> + uint32_t msr;
> +
> + if (counter & (1<<30))
> What means the 30? Should be a #define ...?
Bit 30 of the counter determines whether the counter is general-purpose
or fixed.
Even though this is used only once I suppose I could make a define for
it since there are a couple of other macros like that.
-boris
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH v2 6/6] xen/PMU: PMU emulation code
2014-06-06 17:44 [PATCH v2 0/6] xen/PMU: PMU support for Xen PV guests Boris Ostrovsky
` (4 preceding siblings ...)
2014-06-06 17:44 ` [PATCH v2 5/6] xen/PMU: Intercept PMU-related MSR and APIC accesses Boris Ostrovsky
@ 2014-06-06 17:44 ` Boris Ostrovsky
2014-06-10 13:57 ` [PATCH v2 0/6] xen/PMU: PMU support for Xen PV guests David Vrabel
6 siblings, 0 replies; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-06 17:44 UTC (permalink / raw)
To: david.vrabel, konrad.wilk
Cc: boris.ostrovsky, kevin.tian, dietmar.hahn, JBeulich, xen-devel
Add PMU emulation code that runs when we are processing a PMU interrupt. This code
will allow us not to trap to hypervisor on each MSR/LVTPC access (of which there
may be quite a few in the handler).
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
arch/x86/xen/pmu.c | 209 ++++++++++++++++++++++++++++++++++++-----
include/xen/interface/xenpmu.h | 1 +
2 files changed, 187 insertions(+), 23 deletions(-)
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index f92d406..e543629 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -13,11 +13,22 @@
/* x86_pmu.handle_irq definition */
#include <../kernel/cpu/perf_event.h>
+#define XENPMU_IRQ_PROCESSING 1
+struct xenpmu {
+ /* Shared page between hypervisor and domain */
+ struct xen_pmu_data *xenpmu_data;
-/* Shared page between hypervisor and domain */
-DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
-#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id());
+ uint8_t flags;
+};
+DEFINE_PER_CPU(struct xenpmu, xenpmu_shared);
+#define get_xenpmu_data() ((per_cpu(xenpmu_shared, \
+ smp_processor_id())).xenpmu_data)
+#define get_xenpmu_flags() ((per_cpu(xenpmu_shared, \
+ smp_processor_id())).flags)
+/* Macro for computing address of a PMU MSR bank */
+#define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
+ (uintptr_t)ctxt->field))
/* AMD PMU */
#define F15H_NUM_COUNTERS 6
@@ -160,18 +171,124 @@ static bool is_intel_pmu_msr(u32 msr_index, int *type, int *index)
return false;
}
-bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
+static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
+ int index, bool is_read)
+{
+ uint64_t *reg = NULL;
+ struct xen_pmu_intel_ctxt *ctxt;
+ uint64_t *fix_counters;
+ struct xen_pmu_cntr_pair *arch_cntr_pair;
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+ uint8_t xenpmu_flags = get_xenpmu_flags();
+
+
+ if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
+ return false;
+
+ ctxt = &xenpmu_data->pmu.intel;
+
+ switch (msr) {
+ case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ reg = &ctxt->global_ovf_ctrl;
+ break;
+ case MSR_CORE_PERF_GLOBAL_STATUS:
+ reg = &ctxt->global_status;
+ break;
+ case MSR_CORE_PERF_GLOBAL_CTRL:
+ reg = &ctxt->global_ctrl;
+ break;
+ case MSR_CORE_PERF_FIXED_CTR_CTRL:
+ reg = &ctxt->fixed_ctrl;
+ break;
+ default:
+ switch (type) {
+ case MSR_TYPE_COUNTER:
+ fix_counters = field_offset(ctxt, fixed_counters);
+ reg = &fix_counters[index];
+ break;
+ case MSR_TYPE_ARCH_COUNTER:
+ arch_cntr_pair = field_offset(ctxt, arch_counters);
+ reg = &arch_cntr_pair[index].counter;
+ break;
+ case MSR_TYPE_ARCH_CTRL:
+ arch_cntr_pair = field_offset(ctxt, arch_counters);
+ reg = &arch_cntr_pair[index].control;
+ break;
+ default:
+ return false;
+ }
+ }
+
+ if (reg) {
+ if (is_read)
+ *val = *reg;
+ else {
+ *reg = *val;
+
+ if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL)
+ ctxt->global_status &= (~(*val));
+ }
+ return true;
+ }
+
+ return false;
+}
+
+static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
{
+ uint64_t *reg = NULL;
+ int i, off = 0;
+ struct xen_pmu_amd_ctxt *ctxt;
+ uint64_t *counter_regs, *ctrl_regs;
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+ uint8_t xenpmu_flags = get_xenpmu_flags();
+
+ if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
+ return false;
+
+ if (k7_counters_mirrored &&
+ ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)))
+ msr = get_fam15h_addr(msr);
+ ctxt = &xenpmu_data->pmu.amd;
+ for (i = 0; i < amd_num_counters; i++) {
+ if (msr == amd_ctrls_base + off) {
+ ctrl_regs = field_offset(ctxt, ctrls);
+ reg = &ctrl_regs[i];
+ break;
+ } else if (msr == amd_counters_base + off) {
+ counter_regs = field_offset(ctxt, counters);
+ reg = &counter_regs[i];
+ break;
+ }
+ off += amd_msr_step;
+ }
+
+ if (reg) {
+ if (is_read)
+ *val = *reg;
+ else
+ *reg = *val;
+
+ return true;
+ }
+ return false;
+}
+
+bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
+{
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
if (is_amd_pmu_msr(msr)) {
- *val = native_read_msr_safe(msr, err);
+ if (!xen_amd_pmu_emulate(msr, val, 1))
+ *val = native_read_msr_safe(msr, err);
return true;
}
} else {
int type, index;
+
if (is_intel_pmu_msr(msr, &type, &index)) {
- *val = native_read_msr_safe(msr, err);
+ if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
+ *val = native_read_msr_safe(msr, err);
return true;
}
}
@@ -181,16 +298,20 @@ bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
{
+ uint64_t val = ((uint64_t)high << 32) | low;
+
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
if (is_amd_pmu_msr(msr)) {
- *err = native_write_msr_safe(msr, low, high);
+ if (!xen_amd_pmu_emulate(msr, &val, 0))
+ *err = native_write_msr_safe(msr, low, high);
return true;
}
} else {
int type, index;
if (is_intel_pmu_msr(msr, &type, &index)) {
- *err = native_write_msr_safe(msr, low, high);
+ if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
+ *err = native_write_msr_safe(msr, low, high);
return true;
}
}
@@ -200,24 +321,52 @@ bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
unsigned long long xen_amd_read_pmc(int counter)
{
- uint32_t msr;
- int err;
+ struct xen_pmu_amd_ctxt *ctxt;
+ uint64_t *counter_regs;
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+ uint8_t xenpmu_flags = get_xenpmu_flags();
+
+ if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
+ uint32_t msr;
+ int err;
+
+ msr = amd_counters_base + (counter * amd_msr_step);
+ return native_read_msr_safe(msr, &err);
+ }
- msr = amd_counters_base + (counter * amd_msr_step);
- return native_read_msr_safe(msr, &err);
+ ctxt = &xenpmu_data->pmu.amd;
+ counter_regs = field_offset(ctxt, counters);
+ return counter_regs[counter];
}
unsigned long long xen_intel_read_pmc(int counter)
{
- int err;
- uint32_t msr;
+ struct xen_pmu_intel_ctxt *ctxt;
+ uint64_t *fixed_counters;
+ struct xen_pmu_cntr_pair *arch_cntr_pair;
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+ uint8_t xenpmu_flags = get_xenpmu_flags();
- if (counter & (1<<30))
- msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
- else
- msr = MSR_IA32_PERFCTR0 + counter;
+ if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
+ uint32_t msr;
+ int err;
+
+ if (counter & (1<<30))
+ msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
+ else
+ msr = MSR_IA32_PERFCTR0 + counter;
- return native_read_msr_safe(msr, &err);
+ return native_read_msr_safe(msr, &err);
+ }
+
+ ctxt = &xenpmu_data->pmu.intel;
+ if (counter & (1<<30)) {
+ fixed_counters = field_offset(ctxt, fixed_counters);
+ return fixed_counters[counter & 0xffff];
+ } else {
+ arch_cntr_pair = field_offset(ctxt, arch_counters);
+ return arch_cntr_pair[counter].counter;
+ }
}
unsigned long long xen_read_pmc(int counter)
@@ -302,24 +451,37 @@ static void xen_convert_regs(struct cpu_user_regs *xen_regs,
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
{
int ret = IRQ_NONE;
+ int err;
struct pt_regs regs;
struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+ uint8_t xenpmu_flags = get_xenpmu_flags();
if (!xenpmu_data) {
WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
return ret;
}
+ per_cpu(xenpmu_shared, smp_processor_id()).flags =
+ xenpmu_flags | XENPMU_IRQ_PROCESSING;
+
xen_convert_regs(&xenpmu_data->pmu.regs, ®s);
if (x86_pmu.handle_irq(®s))
ret = IRQ_HANDLED;
+ /* Write out cached context to HW */
+ err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
+ per_cpu(xenpmu_shared, smp_processor_id()).flags = xenpmu_flags;
+ if (err) {
+ WARN_ONCE(1, "%s failed hypercall, err: %d\n", __func__, err);
+ return IRQ_NONE;
+ }
+
return ret;
}
bool is_xen_pmu(int cpu)
{
- return (per_cpu(xenpmu_shared, cpu) != NULL);
+ return (get_xenpmu_data() != NULL);
}
int xen_pmu_init(int cpu)
@@ -345,7 +507,8 @@ int xen_pmu_init(int cpu)
if (ret)
goto fail;
- per_cpu(xenpmu_shared, cpu) = xenpmu_data;
+ per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
+ per_cpu(xenpmu_shared, cpu).flags = 0;
if (cpu == 0) {
perf_register_guest_info_callbacks(&xen_guest_cbs);
@@ -369,6 +532,6 @@ void xen_pmu_finish(int cpu)
(void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
- vfree(per_cpu(xenpmu_shared, cpu));
- per_cpu(xenpmu_shared, cpu) = NULL;
+ vfree(per_cpu(xenpmu_shared, cpu).xenpmu_data);
+ per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL;
}
diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
index 79384e4..ec90673 100644
--- a/include/xen/interface/xenpmu.h
+++ b/include/xen/interface/xenpmu.h
@@ -14,6 +14,7 @@
#define XENPMU_init 4
#define XENPMU_finish 5
#define XENPMU_lvtpc_set 6
+#define XENPMU_flush 7 /* Write cached MSR values to HW */
/* Parameter structure for HYPERVISOR_xenpmu_op call */
struct xen_pmu_params {
--
1.8.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH v2 0/6] xen/PMU: PMU support for Xen PV guests
2014-06-06 17:44 [PATCH v2 0/6] xen/PMU: PMU support for Xen PV guests Boris Ostrovsky
` (5 preceding siblings ...)
2014-06-06 17:44 ` [PATCH v2 6/6] xen/PMU: PMU emulation code Boris Ostrovsky
@ 2014-06-10 13:57 ` David Vrabel
2014-06-10 15:27 ` Boris Ostrovsky
6 siblings, 1 reply; 30+ messages in thread
From: David Vrabel @ 2014-06-10 13:57 UTC (permalink / raw)
To: Boris Ostrovsky, konrad.wilk
Cc: kevin.tian, dietmar.hahn, JBeulich, xen-devel
On 06/06/14 18:44, Boris Ostrovsky wrote:
> (This is the second version of the series. The first one was posted long
> time ago and there are too many changes to list.)
>
> This is the Linux side of Xen PMU support for PV(H) guests, including
> dom0. Only kernel changes are here, toolstack patch will be provided
> separately.
>
> Here is description from the hypervisor patch submission that applies
> to this series as well:
>
> This version has following limitations:
> * For accurate profiling of dom0/Xen dom0 VCPUs should be pinned.
> * Hypervisor code is only profiled on processors that have running dom0 VCPUs
> on them.
> * No backtrace support.
I think there needs to be a plausible plan for how to resolve these
limitations.
David
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 0/6] xen/PMU: PMU support for Xen PV guests
2014-06-10 13:57 ` [PATCH v2 0/6] xen/PMU: PMU support for Xen PV guests David Vrabel
@ 2014-06-10 15:27 ` Boris Ostrovsky
0 siblings, 0 replies; 30+ messages in thread
From: Boris Ostrovsky @ 2014-06-10 15:27 UTC (permalink / raw)
To: David Vrabel; +Cc: kevin.tian, xen-devel, dietmar.hahn, JBeulich
On 06/10/2014 09:57 AM, David Vrabel wrote:
> On 06/06/14 18:44, Boris Ostrovsky wrote:
>> (This is the second version of the series. The first one was posted long
>> time ago and there are too many changes to list.)
>>
>> This is the Linux side of Xen PMU support for PV(H) guests, including
>> dom0. Only kernel changes are here, toolstack patch will be provided
>> separately.
>>
>> Here is description from the hypervisor patch submission that applies
>> to this series as well:
>>
>> This version has following limitations:
>> * For accurate profiling of dom0/Xen dom0 VCPUs should be pinned.
>> * Hypervisor code is only profiled on processors that have running dom0 VCPUs
>> on them.
>> * No backtrace support.
> I think there needs to be a plausible plan for how to resolve these
> limitations.
Come think of it, pinning may not really be a requirement. I suspect a
migrating vcpu will have effect similar to what happens when a process
migrates to a different processor in bare-metal environment. We may need
to add PCPU to report (and there is a reserved u32 filed in
perf_sample_data that I may be able to use).
Backtraces should not be too big a deal (in theory). I in fact had a
prototype at some point.
The second bullet is the most challenging, mostly because it may require
changes to perf kernel code. I had a couple of ideas but they are rather
half-baked. One solution could be to profile only N PCPUs at a time
(where N is the number of dom0's VCPUs) and rotate among all PCPUs.
-boris
^ permalink raw reply [flat|nested] 30+ messages in thread