All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] [PATCH] perf: Attaching an event to a specific PMU
@ 2011-07-03 15:04 Robert Richter
  2011-07-03 18:04 ` Peter Zijlstra
  0 siblings, 1 reply; 18+ messages in thread
From: Robert Richter @ 2011-07-03 15:04 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo, linux-kernel

Peter,

this is a prototype implementation for attaching an event to a
specific PMU. If there is a general acceptance for this approach I
will create patches for upstream integration and base my current IBS
patches on it.

-Robert


This patch creates device nodes for each pmu using udev:

 # ls -l /dev/pmu/
 total 0
 crw-rw---- 1 root root 254, 5 Jul  8  2011 breakpoint
 crw-rw---- 1 root root 254, 4 Jul  8  2011 cpu
 crw-rw---- 1 root root 254, 6 Jul  8  2011 proto
 crw-rw---- 1 root root 254, 1 Jul  8  2011 software
 crw-rw---- 1 root root 254, 2 Jul  8  2011 tracepoint

After opening a device the pmu's file descriptor can be used to attach
an event to it. This works same as attaching an event to a specific
group:

        pmu = open("/dev/pmu/proto", O_RDONLY);
        ...
        event = sys_perf_event_open(&attr, 0, -1, pmu, 0);

This patch includes a working example that attaches an event to the
PMU registered with the name 'proto':

 # ls -l /dev/pmu/proto
 crw-rw---- 1 root root 254, 6 Jul  8  2011 /dev/pmu/proto
 # dmesg -c > /dev/null
 # ./proto
 # dmesg -c
 Found event ffff88041de71c00 (config=0000000000f00ba2) for pmu proto (type=6) on cpu -1
 Adding event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
 Removing event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
 Adding event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
 Removing event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1

Building the example:

 $ cd linux         # Linux kernel source dir
 $ make -C tools/perf/Documentation/examples CFLAGS=-I../.. proto

This approach works for fixed pmu types and also for dynamically
allocated pmus.

I intend to use this event allocation method to implement AMD
IBS. Other pmus can be implemented similar, such as northbridge and/or
uncore events for x86. The implementation is generic and not limited
to a single architecture, it is useful in every system with multiple
pmus.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 include/linux/perf_event.h                |    1 +
 kernel/events/core.c                      |  179 ++++++++++++++++++++++++++---
 tools/perf/Documentation/examples/proto.c |   51 ++++++++
 3 files changed, 213 insertions(+), 18 deletions(-)
 create mode 100644 tools/perf/Documentation/examples/proto.c

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e76a410..3c5452e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -602,6 +602,7 @@ struct pmu {
 	struct list_head		entry;
 
 	struct device			*dev;
+	struct device			*cldev;
 	char				*name;
 	int				type;
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5e70f62..967203c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4,7 +4,8 @@
  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *  Copyright (C) 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
  *
  * For licensing details see kernel-base/COPYING
  */
@@ -35,6 +36,7 @@
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/cdev.h>
 
 #include "internal.h"
 
@@ -5510,42 +5512,68 @@ static struct device_attribute pmu_dev_attrs[] = {
        __ATTR_NULL,
 };
 
-static int pmu_bus_running;
-static struct bus_type pmu_bus = {
-	.name		= "event_source",
-	.dev_attrs	= pmu_dev_attrs,
+static struct pmu_sysfs {
+	int		initialized;
+	struct bus_type	bus;
+	struct cdev	*cdev;
+	unsigned	major;
+	struct class	*class;
+} pmu_sysfs = {
+	.bus = {
+		.name		= "event_source",
+		.dev_attrs	= pmu_dev_attrs,
+	},
 };
 
 static void pmu_dev_release(struct device *dev)
 {
+	struct pmu *pmu = dev_get_drvdata(dev);
+	if (pmu->cldev)
+		device_unregister(pmu->cldev);
 	kfree(dev);
 }
 
+#define MINORMAX	(MINORMASK + 1)
+
 static int pmu_dev_alloc(struct pmu *pmu)
 {
 	int ret = -ENOMEM;
+	struct device *dev;
+	struct device *cldev = NULL;
 
-	pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
-	if (!pmu->dev)
+	dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+	if (!dev)
 		goto out;
 
-	device_initialize(pmu->dev);
-	ret = dev_set_name(pmu->dev, "%s", pmu->name);
+	device_initialize(dev);
+	ret = dev_set_name(dev, "%s", pmu->name);
 	if (ret)
 		goto free_dev;
 
-	dev_set_drvdata(pmu->dev, pmu);
-	pmu->dev->bus = &pmu_bus;
-	pmu->dev->release = pmu_dev_release;
-	ret = device_add(pmu->dev);
+	dev_set_drvdata(dev, pmu);
+	dev->bus = &pmu_sysfs.bus;
+	dev->release = pmu_dev_release;
+	ret = device_add(dev);
 	if (ret)
 		goto free_dev;
 
+	if (pmu_sysfs.class && pmu_sysfs.major && pmu->type < MINORMAX) {
+		cldev = device_create(pmu_sysfs.class, dev,
+				      MKDEV(pmu_sysfs.major, pmu->type),
+				      NULL, "%s", pmu->name);
+		if (IS_ERR(cldev)) {
+			ret = PTR_ERR(cldev);
+			goto free_dev;
+		}
+	}
+
+	pmu->dev = dev;
+	pmu->cldev = cldev;
 out:
 	return ret;
 
 free_dev:
-	put_device(pmu->dev);
+	put_device(dev);
 	goto out;
 }
 
@@ -5580,7 +5608,7 @@ int perf_pmu_register(struct pmu *pmu, char *name, int type)
 	}
 	pmu->type = type;
 
-	if (pmu_bus_running) {
+	if (pmu_sysfs.initialized) {
 		ret = pmu_dev_alloc(pmu);
 		if (ret)
 			goto free_idr;
@@ -5967,6 +5995,38 @@ out:
 	return ret;
 }
 
+static int perf_pmu_open(struct inode *inode, struct file *file)
+{
+	/* minor number is the pmu->type */
+	file->private_data = (void *)(unsigned long)iminor(inode);
+	return 0;
+}
+
+static const struct file_operations perf_pmu_fops = {
+	.owner		= THIS_MODULE,
+	.open		= perf_pmu_open,
+};
+
+static int perf_set_pmu_type(int *type, int fd)
+{
+	struct file *file;
+	int fput_needed;
+	int ret = -EBADF;
+
+	file = fget_light(fd, &fput_needed);
+	if (!file)
+		return ret;
+
+	if (file->f_op == &perf_pmu_fops) {
+		*type = (int)(unsigned long)file->private_data;
+		ret = 0;
+	}
+
+	fput_light(file, fput_needed);
+
+	return ret;
+}
+
 /**
  * sys_perf_event_open - open a performance event, associate it to a task/cpu
  *
@@ -6023,7 +6083,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	if (event_fd < 0)
 		return event_fd;
 
-	if (group_fd != -1) {
+	if (perf_set_pmu_type(&attr.type, group_fd) && group_fd != -1) {
 		group_leader = perf_fget_light(group_fd, &fput_needed);
 		if (IS_ERR(group_leader)) {
 			err = PTR_ERR(group_leader);
@@ -6885,6 +6945,36 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 	return NOTIFY_OK;
 }
 
+static struct pmu perf_proto;
+
+static int perf_proto_init(struct perf_event *event)
+{
+	if (perf_proto.type != event->attr.type)
+		return -ENOENT;
+	pr_info("Found event %p (config=%016llx) for pmu %s (type=%d) on cpu %d\n",
+		event, event->attr.config, perf_proto.name, event->attr.type, event->oncpu);
+	return 0;
+}
+
+static int perf_proto_add(struct perf_event *event, int flags)
+{
+	pr_info("Adding event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
+		event, event->attr.config, perf_proto.name, event->attr.type, event->oncpu);
+	return 0;
+}
+
+static void perf_proto_del(struct perf_event *event, int flags)
+{
+	pr_info("Removing event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
+		event, event->attr.config, perf_proto.name, event->attr.type, event->oncpu);
+}
+
+static struct pmu perf_proto = {
+	.event_init	= perf_proto_init,
+	.add		= perf_proto_add,
+	.del		= perf_proto_del,
+};
+
 void __init perf_event_init(void)
 {
 	int ret;
@@ -6896,6 +6986,7 @@ void __init perf_event_init(void)
 	perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
 	perf_pmu_register(&perf_cpu_clock, NULL, -1);
 	perf_pmu_register(&perf_task_clock, NULL, -1);
+	perf_pmu_register(&perf_proto, "proto", -1);
 	perf_tp_register();
 	perf_cpu_notifier(perf_cpu_notify);
 	register_reboot_notifier(&perf_reboot_notifier);
@@ -6904,6 +6995,55 @@ void __init perf_event_init(void)
 	WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
 }
 
+static char *pmu_devnode(struct device *dev, mode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "%s/%s", dev->class->name, dev_name(dev));
+}
+
+static int __init perf_event_chrdev_init(void)
+{
+	static const char name[] = "pmu";
+	int ret = -ENOMEM;
+	struct cdev *cdev;
+	dev_t devt;
+	struct class *class;
+
+	cdev = cdev_alloc();
+	if (!cdev)
+		goto out;
+
+	ret = alloc_chrdev_region(&devt, 0, MINORMAX, name);
+	if (ret)
+		goto out1;
+
+	cdev->owner = THIS_MODULE;
+	cdev->ops = &perf_pmu_fops;
+	kobject_set_name(&cdev->kobj, "%s", name);
+	ret = cdev_add(cdev, devt, MINORMAX);
+	if (ret)
+		goto out2;
+
+	class = class_create(THIS_MODULE, name);
+	if (IS_ERR(class)) {
+		ret = PTR_ERR(class);
+		goto out3;
+	}
+	class->devnode = pmu_devnode;
+
+	pmu_sysfs.class = class;
+	pmu_sysfs.cdev = cdev;
+	pmu_sysfs.major = MAJOR(devt);
+out:
+	return ret;
+out3:
+	cdev_del(cdev);
+out2:
+	unregister_chrdev_region(devt, MINORMAX);
+out1:
+	kobject_put(&cdev->kobj);
+	goto out;
+}
+
 static int __init perf_event_sysfs_init(void)
 {
 	struct pmu *pmu;
@@ -6911,7 +7051,10 @@ static int __init perf_event_sysfs_init(void)
 
 	mutex_lock(&pmus_lock);
 
-	ret = bus_register(&pmu_bus);
+	ret = perf_event_chrdev_init();
+	WARN(ret, "Unable to create pmu char device, reason %d\n", ret);
+
+	ret = bus_register(&pmu_sysfs.bus);
 	if (ret)
 		goto unlock;
 
@@ -6922,7 +7065,7 @@ static int __init perf_event_sysfs_init(void)
 		ret = pmu_dev_alloc(pmu);
 		WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
 	}
-	pmu_bus_running = 1;
+	pmu_sysfs.initialized = 1;
 	ret = 0;
 
 unlock:
diff --git a/tools/perf/Documentation/examples/proto.c b/tools/perf/Documentation/examples/proto.c
new file mode 100644
index 0000000..967260f
--- /dev/null
+++ b/tools/perf/Documentation/examples/proto.c
@@ -0,0 +1,51 @@
+/*
+ * Prototype to attach an event to a specific PMU
+ *
+ *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
+ *
+ * Sample code that attaches an event to a specified PMU.
+ *
+ *  # ls -l /dev/pmu/proto
+ *  crw-rw---- 1 root root 254, 6 Jul  8  2011 /dev/pmu/proto
+ *  # dmesg -c > /dev/null
+ *  # ./proto
+ *  # dmesg -c
+ *  Found event ffff88041de71c00 (config=0000000000f00ba2) for pmu proto (type=6) on cpu -1
+ *  Adding event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
+ *  Removing event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
+ *  Adding event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
+ *  Removing event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
+ *
+ * Building:
+ *
+ *  $ cd linux         # Linux kernel source dir
+ *  $ make -C tools/perf/Documentation/examples CFLAGS=-I../.. proto
+ */
+
+#include <fcntl.h>
+#include <err.h>
+
+#include "perf.h"
+
+int main (int argc, char *argv[])
+{
+	int pmu, event;
+	struct perf_event_attr attr = { 0 };
+
+	pmu = open("/dev/pmu/proto", O_RDONLY);
+	if (pmu == -1)
+		err(1, "pmu not found");
+
+	attr.config = 0xf00ba2;
+
+	event = sys_perf_event_open(&attr, 0, -1, pmu, 0);
+	if (event == -1) {
+		close(pmu);
+		err(1, "event creation failed");
+	}
+
+	close(event);
+	close(pmu);
+
+	exit(0);
+}
-- 
1.7.5.3


-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-03 15:04 [RFC] [PATCH] perf: Attaching an event to a specific PMU Robert Richter
@ 2011-07-03 18:04 ` Peter Zijlstra
  2011-07-04 17:59   ` Robert Richter
  0 siblings, 1 reply; 18+ messages in thread
From: Peter Zijlstra @ 2011-07-03 18:04 UTC (permalink / raw)
  To: Robert Richter
  Cc: Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo, linux-kernel

On Sun, 2011-07-03 at 17:04 +0200, Robert Richter wrote:
> 
> this is a prototype implementation for attaching an event to a
> specific PMU. If there is a general acceptance for this approach I
> will create patches for upstream integration and base my current IBS
> patches on it. 

But there's already an infrastructure to do this, simply stick the
contents of /sys/bus/event_source/devices/*/type in
perf_event_attr::type and you're done.

I don't see the need for /dev crap.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-03 18:04 ` Peter Zijlstra
@ 2011-07-04 17:59   ` Robert Richter
  2011-07-05  8:51     ` Peter Zijlstra
                       ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Robert Richter @ 2011-07-04 17:59 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo, linux-kernel

On 03.07.11 14:04:31, Peter Zijlstra wrote:
> On Sun, 2011-07-03 at 17:04 +0200, Robert Richter wrote:
> > 
> > this is a prototype implementation for attaching an event to a
> > specific PMU. If there is a general acceptance for this approach I
> > will create patches for upstream integration and base my current IBS
> > patches on it. 
> 
> But there's already an infrastructure to do this, simply stick the
> contents of /sys/bus/event_source/devices/*/type in
> perf_event_attr::type and you're done.
> 
> I don't see the need for /dev crap.

Using sysfs by reading from /sys/bus/event_source/devices/*/type would
work too. But there are several reasons why I like my approach.

First of all, it follows the idea of grouping events. Attaching events
to a specific pmu is not different from attaching them to a specific
event group. It is actually the same if we think of one group for
events that may be scheduled on only one pmu. Thus, treating a pmu
like a group event is the logical step for intuitive usage of the
perf_open syscall. This way we have symmetrical implementations for
binding events to groups or pmus.

The syscall interface has already everything needed for this.
Following the concept of attaching events to a group with a file
descriptor, we simply must create a file descriptor pointing to a pmu
device. This works without extending the perf_open syscall interface,
no changes to the interface are needed.

Device nodes are the general approach for controlling devices from
user-space, they are integral part of the Linux device driver model.
With a device file descriptor opened from a device node we can
explicitly point to a pmu device.

Representing a device with a device node is common programming
practice. Usage of device nodes is not deprecated. There are existing
frameworks to easily create such devices. With dynamically device node
allocation and udev there are solutions for drawbacks of /dev. Why not
having a device node for pmus? What are your concerns using /dev?

The patch introduces a pmu device class and devices are grouped by
class now. They can be found in /dev/pmu/* instead of searching for
device attributes somewhere in the system hirarchie at
/sys/bus/event_source/devices/*/type. A device node is easier to
handle.

The implementation only needs about 150 lines of kernel code. It is
straight and separated. There is nothing special what makes it hard to
read or maintain. The code is using typical kernel device allocation
methods. Do you think this patch makes kernel code too complex?

There is much easier user-space code:

       pmu = open("/dev/pmu/proto", O_RDONLY);
       if (pmu == -1)
               err(1, "pmu not found");

       attr.config = 0xf00ba2;

       event = sys_perf_event_open(&attr, 0, -1, pmu, 0);

vs.

        pmu = open("/sys/bus/event_source/devices/proto/type", O_RDONLY);
        if (pmu == -1)
                err(1, "pmu not found");
        size = read(pmu, buf, BUFSIZ - 1);
        if (size < 0) {
                close(pmu);
                err(1, "failed to read pmu type");
        }
        buf[size] = '0';

        attr.type = atoi(buf);
        attr.config = 0xf00ba2;

       event = sys_perf_event_open(&attr, 0, -1, -1, 0);

There are also additional header includes and variable declarations
needed.

(Btw, current kernel code does not support dynamically allocated pmu
types due to a check in perf_copy_attr():

        if (attr->type >= PERF_TYPE_MAX)
                return -EINVAL;
)

Beside of that, using /sys/ is racy. There is no protection against
unregistering the pmu. Probably this might not cause big problems in
practice, but it can be done better. With open/close we can protect
the pmu from being removed.

Overall, my approach improves the perf design. It adds a better and
more intuitve access to perf from user space with clear and common
methods and interfaces. Please let me know the concerns you have.

Thanks,

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-04 17:59   ` Robert Richter
@ 2011-07-05  8:51     ` Peter Zijlstra
  2011-07-05  9:12       ` Ingo Molnar
  2011-07-05  9:47     ` [PATCH] perf: Extend attr check to allow also dynamically generated Robert Richter
  2011-07-05 10:53     ` [PATCH] perf: Extend attr check to allow also dynamically generated types Robert Richter
  2 siblings, 1 reply; 18+ messages in thread
From: Peter Zijlstra @ 2011-07-05  8:51 UTC (permalink / raw)
  To: Robert Richter
  Cc: Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo, linux-kernel

On Mon, 2011-07-04 at 19:59 +0200, Robert Richter wrote:

> First of all, it follows the idea of grouping events. Attaching events
> to a specific pmu is not different from attaching them to a specific
> event group. It is actually the same if we think of one group for
> events that may be scheduled on only one pmu. Thus, treating a pmu
> like a group event is the logical step for intuitive usage of the
> perf_open syscall. This way we have symmetrical implementations for
> binding events to groups or pmus.

That's not a good analogy. Grouping is about events being together, not
about events being on a particular pmu.

> Device nodes are the general approach for controlling devices from
> user-space, they are integral part of the Linux device driver model.
> With a device file descriptor opened from a device node we can
> explicitly point to a pmu device.

Yeah, but we already have a /sys interface, so this ship has sailed.

> Representing a device with a device node is common programming
> practice. Usage of device nodes is not deprecated. There are existing
> frameworks to easily create such devices. With dynamically device node
> allocation and udev there are solutions for drawbacks of /dev. Why not
> having a device node for pmus? What are your concerns using /dev?

Its impossible to represent events using that /dev interface,
furthermore we already have a /sys interface, so this is pure
duplication of a 

> The implementation only needs about 150 lines of kernel code. It is
> straight and separated. There is nothing special what makes it hard to
> read or maintain. The code is using typical kernel device allocation
> methods. Do you think this patch makes kernel code too complex?

It adds a redundant interface.

> Beside of that, using /sys/ is racy. There is no protection against
> unregistering the pmu. Probably this might not cause big problems in
> practice, but it can be done better. With open/close we can protect
> the pmu from being removed.

Why can't the open/close of the sysfs file provide the same? It just
means you have to close after sys_perf_event_open()

> Overall, my approach improves the perf design. It adds a better and
> more intuitve access to perf from user space with clear and common
> methods and interfaces. Please let me know the concerns you have.

Its redundant, this interface ship has sailed, its not going to happen.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-05  8:51     ` Peter Zijlstra
@ 2011-07-05  9:12       ` Ingo Molnar
  2011-07-06 16:53         ` Robert Richter
  0 siblings, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2011-07-05  9:12 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Robert Richter, Paul Mackerras, Arnaldo Carvalho de Melo, linux-kernel


* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> > Overall, my approach improves the perf design. It adds a better 
> > and more intuitve access to perf from user space with clear and 
> > common methods and interfaces. Please let me know the concerns 
> > you have.
> 
> Its redundant, this interface ship has sailed, its not going to 
> happen.

Even if we had the choice, i don't see how a /dev based enumeration 
of PMUs is in any way better than a topologically attached set of 
PMUs in /sys.

This kind of structure is nice in principle:

 # ls -l /dev/pmu/
 total 0
 crw-rw---- 1 root root 254, 5 Jul  8  2011 breakpoint
 crw-rw---- 1 root root 254, 4 Jul  8  2011 cpu
 crw-rw---- 1 root root 254, 6 Jul  8  2011 proto
 crw-rw---- 1 root root 254, 1 Jul  8  2011 software
 crw-rw---- 1 root root 254, 2 Jul  8  2011 tracepoint

But it should be done in /sys/. That way for example the graphics 
card:

  00:02.0 VGA compatible controller

should have its events under:

  /sys/devices/pci0000\:00/0000\:00\:02.0/events/

breakpoints could be listed in:

  /sys/devices/system/cpu/cpu0/events/

core kernel software events could be listed in:

  /sys/devices/system/core/events/

tracepoints could be listed in their respective subsystem directories:

  /sys/devices/system/timekeeping/events/    # timer tracepoints
  /sys/devices/system/machinecheck/events/   # the MCE tracepoint

  /sys/kernel/slab/events/                   # SLAB tracepoints

  /sys/kernel/debug/tracing/events/          # the old, legacy location of
                                             # tracepoint events

there should also be a /sys/class/events/ populated with symlinks to 
all their locations.

Then we could transition all events to their respective sysfs 
locations, where they would be easily discoverable and enumerable.

The events would also automatically move (and with time, be 
automatically added) when a new device is added to sysfs.

Peter, there was a patch (from you?) that started doing this kind of 
sysfs enumeration with a class added as well - do you still have it 
or do you have a link to it?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] perf: Extend attr check to allow also dynamically generated
  2011-07-04 17:59   ` Robert Richter
  2011-07-05  8:51     ` Peter Zijlstra
@ 2011-07-05  9:47     ` Robert Richter
  2011-07-05 10:51       ` Peter Zijlstra
  2011-07-05 10:53     ` [PATCH] perf: Extend attr check to allow also dynamically generated types Robert Richter
  2 siblings, 1 reply; 18+ messages in thread
From: Robert Richter @ 2011-07-05  9:47 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo, linux-kernel

On 04.07.11 19:59:27, Robert Richter wrote:
> (Btw, current kernel code does not support dynamically allocated pmu
> types due to a check in perf_copy_attr():
> 
>         if (attr->type >= PERF_TYPE_MAX)
>                 return -EINVAL;
> )

Below a fix for this.

-Robert


>From 63e76c3d827e3d6d24ff4ee24854523c054c7179 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Tue, 5 Jul 2011 11:04:39 +0200
Subject: [PATCH] perf: Extend attr check to allow also dynamically generated
 types

When attaching events to a pmu with generated type, the initialization
fails. Extending the check to allow such types.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 kernel/events/core.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index cca3588..5900729 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5958,7 +5958,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	 * If the type exists, the corresponding creation will verify
 	 * the attr->config.
 	 */
-	if (attr->type >= PERF_TYPE_MAX)
+	if (attr->type >= PERF_TYPE_MAX && !idr_find(&pmu_idr, attr->type))
 		return -EINVAL;
 
 	if (attr->__reserved_1)
-- 
1.7.5.3




-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] perf: Extend attr check to allow also dynamically generated
  2011-07-05  9:47     ` [PATCH] perf: Extend attr check to allow also dynamically generated Robert Richter
@ 2011-07-05 10:51       ` Peter Zijlstra
  2011-07-05 10:56         ` Robert Richter
  0 siblings, 1 reply; 18+ messages in thread
From: Peter Zijlstra @ 2011-07-05 10:51 UTC (permalink / raw)
  To: Robert Richter
  Cc: Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo, linux-kernel

On Tue, 2011-07-05 at 11:47 +0200, Robert Richter wrote:

> From 63e76c3d827e3d6d24ff4ee24854523c054c7179 Mon Sep 17 00:00:00 2001
> From: Robert Richter <robert.richter@amd.com>
> Date: Tue, 5 Jul 2011 11:04:39 +0200
> Subject: [PATCH] perf: Extend attr check to allow also dynamically generated
>  types
> 
> When attaching events to a pmu with generated type, the initialization
> fails. Extending the check to allow such types.
> 
> Signed-off-by: Robert Richter <robert.richter@amd.com>
> ---
>  kernel/events/core.c |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index cca3588..5900729 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -5958,7 +5958,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
>  	 * If the type exists, the corresponding creation will verify
>  	 * the attr->config.
>  	 */
> -	if (attr->type >= PERF_TYPE_MAX)
> +	if (attr->type >= PERF_TYPE_MAX && !idr_find(&pmu_idr, attr->type))
>  		return -EINVAL;
>  
>  	if (attr->__reserved_1)

Lin Ming submitted a similar patch:

 http://marc.info/?l=linux-kernel&m=130942109729864

Which I prefer as it removes code. Event creation will fail in
perf_init_event() in that case when an invalid type is specified.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] perf: Extend attr check to allow also dynamically generated types
  2011-07-04 17:59   ` Robert Richter
  2011-07-05  8:51     ` Peter Zijlstra
  2011-07-05  9:47     ` [PATCH] perf: Extend attr check to allow also dynamically generated Robert Richter
@ 2011-07-05 10:53     ` Robert Richter
  2 siblings, 0 replies; 18+ messages in thread
From: Robert Richter @ 2011-07-05 10:53 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo, linux-kernel

(resent with fixed subject)

On 04.07.11 19:59:27, Robert Richter wrote:
> (Btw, current kernel code does not support dynamically allocated pmu
> types due to a check in perf_copy_attr():
> 
>         if (attr->type >= PERF_TYPE_MAX)
>                 return -EINVAL;
> )

Below a fix for this.

-Robert


>From 63e76c3d827e3d6d24ff4ee24854523c054c7179 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Tue, 5 Jul 2011 11:04:39 +0200
Subject: [PATCH] perf: Extend attr check to allow also dynamically generated
 types

When attaching events to a pmu with generated type, the initialization
fails. Extending the check to allow such types.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 kernel/events/core.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index cca3588..5900729 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5958,7 +5958,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	 * If the type exists, the corresponding creation will verify
 	 * the attr->config.
 	 */
-	if (attr->type >= PERF_TYPE_MAX)
+	if (attr->type >= PERF_TYPE_MAX && !idr_find(&pmu_idr, attr->type))
 		return -EINVAL;
 
 	if (attr->__reserved_1)
-- 
1.7.5.3




-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] perf: Extend attr check to allow also dynamically generated
  2011-07-05 10:51       ` Peter Zijlstra
@ 2011-07-05 10:56         ` Robert Richter
  0 siblings, 0 replies; 18+ messages in thread
From: Robert Richter @ 2011-07-05 10:56 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo, linux-kernel

On 05.07.11 06:51:00, Peter Zijlstra wrote:
> Lin Ming submitted a similar patch:
> 
>  http://marc.info/?l=linux-kernel&m=130942109729864
> 
> Which I prefer as it removes code. Event creation will fail in
> perf_init_event() in that case when an invalid type is specified.

Thanks for the pointer, ignore my resent patch.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-05  9:12       ` Ingo Molnar
@ 2011-07-06 16:53         ` Robert Richter
  2011-07-06 17:10           ` Ingo Molnar
  2011-07-06 17:12           ` Peter Zijlstra
  0 siblings, 2 replies; 18+ messages in thread
From: Robert Richter @ 2011-07-06 16:53 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Paul Mackerras, Arnaldo Carvalho de Melo, linux-kernel

On 05.07.11 05:12:52, Ingo Molnar wrote:
> 
> * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> 
> > > Overall, my approach improves the perf design. It adds a better 
> > > and more intuitve access to perf from user space with clear and 
> > > common methods and interfaces. Please let me know the concerns 
> > > you have.
> > 
> > Its redundant, this interface ship has sailed, its not going to 
> > happen.
> 
> Even if we had the choice, i don't see how a /dev based enumeration 
> of PMUs is in any way better than a topologically attached set of 
> PMUs in /sys.
> 
> This kind of structure is nice in principle:
> 
>  # ls -l /dev/pmu/
>  total 0
>  crw-rw---- 1 root root 254, 5 Jul  8  2011 breakpoint
>  crw-rw---- 1 root root 254, 4 Jul  8  2011 cpu
>  crw-rw---- 1 root root 254, 6 Jul  8  2011 proto
>  crw-rw---- 1 root root 254, 1 Jul  8  2011 software
>  crw-rw---- 1 root root 254, 2 Jul  8  2011 tracepoint
> 
> But it should be done in /sys/.

I have to learn yet why /dev is bad and /sys is good...

The system topology is always in /sys, also for device nodes. But we
can't get a device file descriptor from /sys. I doubt /sys is capable
to handle a device use count (need to be checked). We actually must
grab the pmu while attaching events to it. And, user space
implementation is must easier with /dev (see code in my previous
mail).

My patch also includes code that creates a device class. It is also
visible in /sys/class/pmu/*.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-06 16:53         ` Robert Richter
@ 2011-07-06 17:10           ` Ingo Molnar
  2011-07-06 17:14             ` Peter Zijlstra
  2011-07-07 10:22             ` Robert Richter
  2011-07-06 17:12           ` Peter Zijlstra
  1 sibling, 2 replies; 18+ messages in thread
From: Ingo Molnar @ 2011-07-06 17:10 UTC (permalink / raw)
  To: Robert Richter
  Cc: Peter Zijlstra, Paul Mackerras, Arnaldo Carvalho de Melo, linux-kernel


* Robert Richter <robert.richter@amd.com> wrote:

> On 05.07.11 05:12:52, Ingo Molnar wrote:
> > 
> > * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > 
> > > > Overall, my approach improves the perf design. It adds a better 
> > > > and more intuitve access to perf from user space with clear and 
> > > > common methods and interfaces. Please let me know the concerns 
> > > > you have.
> > > 
> > > Its redundant, this interface ship has sailed, its not going to 
> > > happen.
> > 
> > Even if we had the choice, i don't see how a /dev based enumeration 
> > of PMUs is in any way better than a topologically attached set of 
> > PMUs in /sys.
> > 
> > This kind of structure is nice in principle:
> > 
> >  # ls -l /dev/pmu/
> >  total 0
> >  crw-rw---- 1 root root 254, 5 Jul  8  2011 breakpoint
> >  crw-rw---- 1 root root 254, 4 Jul  8  2011 cpu
> >  crw-rw---- 1 root root 254, 6 Jul  8  2011 proto
> >  crw-rw---- 1 root root 254, 1 Jul  8  2011 software
> >  crw-rw---- 1 root root 254, 2 Jul  8  2011 tracepoint
> > 
> > But it should be done in /sys/.
> 
> I have to learn yet why /dev is bad and /sys is good...

Because /sys is already there and already carries rather rich 
classification of various hardware components, devices and kernel 
subsystems.

/dev/ is mostly a flat registry of classical, unstructured devices.

> The system topology is always in /sys, also for device nodes. But 
> we can't get a device file descriptor from /sys. I doubt /sys is 
> capable to handle a device use count (need to be checked). We 
> actually must grab the pmu while attaching events to it. And, user 
> space implementation is must easier with /dev (see code in my 
> previous mail).

I think Peter suggested it that an open() done in /sys should give us 
a handle to a given event?

> My patch also includes code that creates a device class. It is also 
> visible in /sys/class/pmu/*.

But PMU is a very limited term: what we want is a higher level 
organization of 'event sources' and 'events'. Some may come from 
PMUs, many wont.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-06 16:53         ` Robert Richter
  2011-07-06 17:10           ` Ingo Molnar
@ 2011-07-06 17:12           ` Peter Zijlstra
  2011-07-07  9:21             ` Robert Richter
  1 sibling, 1 reply; 18+ messages in thread
From: Peter Zijlstra @ 2011-07-06 17:12 UTC (permalink / raw)
  To: Robert Richter
  Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo, linux-kernel

On Wed, 2011-07-06 at 18:53 +0200, Robert Richter wrote:
> On 05.07.11 05:12:52, Ingo Molnar wrote:
> > 
> > * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > 
> > > > Overall, my approach improves the perf design. It adds a better 
> > > > and more intuitve access to perf from user space with clear and 
> > > > common methods and interfaces. Please let me know the concerns 
> > > > you have.
> > > 
> > > Its redundant, this interface ship has sailed, its not going to 
> > > happen.
> > 
> > Even if we had the choice, i don't see how a /dev based enumeration 
> > of PMUs is in any way better than a topologically attached set of 
> > PMUs in /sys.
> > 
> > This kind of structure is nice in principle:
> > 
> >  # ls -l /dev/pmu/
> >  total 0
> >  crw-rw---- 1 root root 254, 5 Jul  8  2011 breakpoint
> >  crw-rw---- 1 root root 254, 4 Jul  8  2011 cpu
> >  crw-rw---- 1 root root 254, 6 Jul  8  2011 proto
> >  crw-rw---- 1 root root 254, 1 Jul  8  2011 software
> >  crw-rw---- 1 root root 254, 2 Jul  8  2011 tracepoint
> > 
> > But it should be done in /sys/.
> 
> I have to learn yet why /dev is bad and /sys is good...

Its not so much /dev is bad, as not quite suited for this.

> The system topology is always in /sys, also for device nodes. But we
> can't get a device file descriptor from /sys. I doubt /sys is capable
> to handle a device use count (need to be checked). We actually must
> grab the pmu while attaching events to it. And, user space
> implementation is must easier with /dev (see code in my previous
> mail).

Well you must not per-se, from a user's perspective there isn't much of
a difference between if the sys_perf_event_open() fails or if the
initial file open fails, in both cases he's not getting an event.

perf stat -e IBS:fetches will always have a fail against rmmod, rmmod
could complete before we try to open the file (assuming IBS is a
module).

> 
> My patch also includes code that creates a device class. It is also
> visible in /sys/class/pmu/*.

So not only are you providing a duplicate of existing interfaces, you're
actually duplicating information inside sysfs as well.



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-06 17:10           ` Ingo Molnar
@ 2011-07-06 17:14             ` Peter Zijlstra
  2011-07-06 17:15               ` Ingo Molnar
  2011-07-07 10:22             ` Robert Richter
  1 sibling, 1 reply; 18+ messages in thread
From: Peter Zijlstra @ 2011-07-06 17:14 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Robert Richter, Paul Mackerras, Arnaldo Carvalho de Melo, linux-kernel

On Wed, 2011-07-06 at 19:10 +0200, Ingo Molnar wrote:
> 
> I think Peter suggested it that an open() done in /sys should give us 
> a handle to a given event? 

No, more like the open of a sysfs file could also grab a refcount on the
module. All it would take is a custom open/close method for sysfs files.
Now I have no clue how to do that, but that's another issue.

Also, I really don't see the point. With modular PMUs there's always
races against unload, just don't do that or live with it.



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-06 17:14             ` Peter Zijlstra
@ 2011-07-06 17:15               ` Ingo Molnar
  0 siblings, 0 replies; 18+ messages in thread
From: Ingo Molnar @ 2011-07-06 17:15 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Robert Richter, Paul Mackerras, Arnaldo Carvalho de Melo, linux-kernel


* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> On Wed, 2011-07-06 at 19:10 +0200, Ingo Molnar wrote:
> > 
> > I think Peter suggested it that an open() done in /sys should 
> > give us a handle to a given event?
> 
> No, more like the open of a sysfs file could also grab a refcount 
> on the module. All it would take is a custom open/close method for 
> sysfs files. Now I have no clue how to do that, but that's another 
> issue.
> 
> Also, I really don't see the point. With modular PMUs there's 
> always races against unload, just don't do that or live with it.

I don't think it's a big issue either - module load/unloads are rare.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-06 17:12           ` Peter Zijlstra
@ 2011-07-07  9:21             ` Robert Richter
  2011-07-07  9:39               ` Robert Richter
  2011-07-07 19:38               ` Peter Zijlstra
  0 siblings, 2 replies; 18+ messages in thread
From: Robert Richter @ 2011-07-07  9:21 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo, linux-kernel

On 06.07.11 13:12:48, Peter Zijlstra wrote:
> On Wed, 2011-07-06 at 18:53 +0200, Robert Richter wrote:

> > The system topology is always in /sys, also for device nodes. But we
> > can't get a device file descriptor from /sys. I doubt /sys is capable
> > to handle a device use count (need to be checked). We actually must
> > grab the pmu while attaching events to it. And, user space
> > implementation is must easier with /dev (see code in my previous
> > mail).
> 
> Well you must not per-se, from a user's perspective there isn't much of
> a difference between if the sys_perf_event_open() fails or if the
> initial file open fails, in both cases he's not getting an event.

It is not that I want to create event handles with open. For this
there is the syscall. I use open() to create a unique reference to a
pmu in userspace. No more, and /dev is the right thing to do this.

> perf stat -e IBS:fetches will always have a fail against rmmod, rmmod
> could complete before we try to open the file (assuming IBS is a
> module).
> 
> > 
> > My patch also includes code that creates a device class. It is also
> > visible in /sys/class/pmu/*.
> 
> So not only are you providing a duplicate of existing interfaces, you're
> actually duplicating information inside sysfs as well.

This is not duplication. /sys provides information about system
hierarchy, /dev is for controlling devices. Thus, every device node is
visible in /sys as it is part of the system.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-07  9:21             ` Robert Richter
@ 2011-07-07  9:39               ` Robert Richter
  2011-07-07 19:38               ` Peter Zijlstra
  1 sibling, 0 replies; 18+ messages in thread
From: Robert Richter @ 2011-07-07  9:39 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo, linux-kernel

One more note:

> On 06.07.11 13:12:48, Peter Zijlstra wrote:

> > perf stat -e IBS:fetches will always have a fail against rmmod, rmmod
> > could complete before we try to open the file (assuming IBS is a
> > module).

If the module is busy, it wont be unloaded.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-06 17:10           ` Ingo Molnar
  2011-07-06 17:14             ` Peter Zijlstra
@ 2011-07-07 10:22             ` Robert Richter
  1 sibling, 0 replies; 18+ messages in thread
From: Robert Richter @ 2011-07-07 10:22 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Paul Mackerras, Arnaldo Carvalho de Melo, linux-kernel

On 06.07.11 13:10:15, Ingo Molnar wrote:
> 
> * Robert Richter <robert.richter@amd.com> wrote:

> > I have to learn yet why /dev is bad and /sys is good...
> 
> Because /sys is already there and already carries rather rich 
> classification of various hardware components, devices and kernel 
> subsystems.
> 
> /dev/ is mostly a flat registry of classical, unstructured devices.

As already said, /sys provides system information. Of course it is
richer then. But I also don't want/need to specify a pmu that deep in
the system hierarchy. And /dev is flat and easy. What of the following
would you prefer?

 /sys/devices/pci0000:00/0000:00:03.0/usb2/2-2/2-2:1.1/input/input4/mouse0
 /dev/input/mouse0

Beside of this, you can't control devices with /sys.

> > The system topology is always in /sys, also for device nodes. But 
> > we can't get a device file descriptor from /sys. I doubt /sys is 
> > capable to handle a device use count (need to be checked). We 
> > actually must grab the pmu while attaching events to it. And, user 
> > space implementation is must easier with /dev (see code in my 
> > previous mail).
> 
> I think Peter suggested it that an open() done in /sys should give us 
> a handle to a given event?

Using /dev solves the problems in a simple way, and simple things
often work very well. With open() we control the refcount and we then
get a device reference which is unique and immediately usable without
parsing strings. This makes user code small and intuitive and also
avoids complex config checks on the kernel side to catch wrongly
assigned events.

While we probably can live with such races now on static pmus,
thinking of future use cases like hotplugable graphic cards with pmus
in it, we actually want a clean design for this. I don't see how to do
this with sysfs. As there are no implementations yet for such an
interface, we now have the chance to design it properly.

> > My patch also includes code that creates a device class. It is also 
> > visible in /sys/class/pmu/*.
> 
> But PMU is a very limited term: what we want is a higher level 
> organization of 'event sources' and 'events'. Some may come from 
> PMUs, many wont.

You can see it as 'virtual' pmu, like the virtual file system.
Whatever the name for this is, 'event sources' is not very handy. But
don't want to change this.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] [PATCH] perf: Attaching an event to a specific PMU
  2011-07-07  9:21             ` Robert Richter
  2011-07-07  9:39               ` Robert Richter
@ 2011-07-07 19:38               ` Peter Zijlstra
  1 sibling, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2011-07-07 19:38 UTC (permalink / raw)
  To: Robert Richter
  Cc: Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo, linux-kernel

On Thu, 2011-07-07 at 11:21 +0200, Robert Richter wrote:
> On 06.07.11 13:12:48, Peter Zijlstra wrote:
> > On Wed, 2011-07-06 at 18:53 +0200, Robert Richter wrote:
> 
> > > The system topology is always in /sys, also for device nodes. But we
> > > can't get a device file descriptor from /sys. I doubt /sys is capable
> > > to handle a device use count (need to be checked). We actually must
> > > grab the pmu while attaching events to it. And, user space
> > > implementation is must easier with /dev (see code in my previous
> > > mail).
> > 
> > Well you must not per-se, from a user's perspective there isn't much of
> > a difference between if the sys_perf_event_open() fails or if the
> > initial file open fails, in both cases he's not getting an event.
> 
> It is not that I want to create event handles with open. 

Never said you wanted to do that.

> For this
> there is the syscall. I use open() to create a unique reference to a
> pmu in userspace. 

Yeah, and its pointless.

Which ever way around you turn this problem it always ends up looking
like:

  fd = open("/sys/foo");
  event = sys_perf_event_open();
  close(fd);
    /* do stuff */
  close(event);

The pmu not going away between the open() and sys_perf_event_open() is
irrelevant. What is required is the success of both calls, up until that
happens its a fail.

> > perf stat -e IBS:fetches will always have a fail against rmmod, rmmod
> > could complete before we try to open the file (assuming IBS is a
> > module).
> > 
> > > 
> > > My patch also includes code that creates a device class. It is also
> > > visible in /sys/class/pmu/*.
> > 
> > So not only are you providing a duplicate of existing interfaces, you're
> > actually duplicating information inside sysfs as well.
> 
> This is not duplication. /sys provides information about system
> hierarchy, /dev is for controlling devices. Thus, every device node is
> visible in /sys as it is part of the system.

It is duplication, we already have a /sys representation of every pmu,
by adding /sys/class/pmu/ you add another, that's pure duplication.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2011-07-07 19:43 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-03 15:04 [RFC] [PATCH] perf: Attaching an event to a specific PMU Robert Richter
2011-07-03 18:04 ` Peter Zijlstra
2011-07-04 17:59   ` Robert Richter
2011-07-05  8:51     ` Peter Zijlstra
2011-07-05  9:12       ` Ingo Molnar
2011-07-06 16:53         ` Robert Richter
2011-07-06 17:10           ` Ingo Molnar
2011-07-06 17:14             ` Peter Zijlstra
2011-07-06 17:15               ` Ingo Molnar
2011-07-07 10:22             ` Robert Richter
2011-07-06 17:12           ` Peter Zijlstra
2011-07-07  9:21             ` Robert Richter
2011-07-07  9:39               ` Robert Richter
2011-07-07 19:38               ` Peter Zijlstra
2011-07-05  9:47     ` [PATCH] perf: Extend attr check to allow also dynamically generated Robert Richter
2011-07-05 10:51       ` Peter Zijlstra
2011-07-05 10:56         ` Robert Richter
2011-07-05 10:53     ` [PATCH] perf: Extend attr check to allow also dynamically generated types Robert Richter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.