All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] perf: Remove orphaned children events
@ 2014-08-01 12:32 Jiri Olsa
  2014-08-01 12:33 ` [PATCH 1/3] perf: Do not allow to create kernel events without handler Jiri Olsa
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Jiri Olsa @ 2014-08-01 12:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: Arnaldo Carvalho de Melo, Corey Ashford, Frederic Weisbecker,
	Mark Rutland, Paul Mackerras, Peter Zijlstra, Jiri Olsa

hi,
adding a support to remove orphaned children events, so they do not
eat resources once parent event is gone as discussed here:
  lkml.kernel.org/r/1405079782-8139-3-git-send-email-jolsa@kernel.org
and here:
  http://marc.info/?l=linux-kernel&m=140568679618148&w=2

Also reachable in here:
  git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
  perf/core_orphans_removal

thanks,
jirka


Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/perf_event.h |   4 ++
 kernel/events/core.c       | 116 ++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 113 insertions(+), 7 deletions(-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/3] perf: Do not allow to create kernel events without handler
  2014-08-01 12:32 [PATCH 0/3] perf: Remove orphaned children events Jiri Olsa
@ 2014-08-01 12:33 ` Jiri Olsa
  2014-08-01 14:56   ` Peter Zijlstra
  2014-08-01 12:33 ` [PATCH 2/3] perf: Set owner pointer for kernel events Jiri Olsa
  2014-08-01 12:33 ` [PATCH 3/3] perf: Add queued work to remove orphaned child events Jiri Olsa
  2 siblings, 1 reply; 8+ messages in thread
From: Jiri Olsa @ 2014-08-01 12:33 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jiri Olsa, Arnaldo Carvalho de Melo, Corey Ashford,
	Frederic Weisbecker, Mark Rutland, Paul Mackerras,
	Peter Zijlstra

Force kernel events to specify the handler, because
there's no use for kernel perf event without it.

Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 kernel/events/core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1cf24b3e42ec..75c4ff8d1590 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7355,6 +7355,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	struct perf_event *event;
 	int err;
 
+	if (WARN_ON(!overflow_handler))
+		return NULL;
+
 	/*
 	 * Get the target context (task or percpu):
 	 */
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/3] perf: Set owner pointer for kernel events
  2014-08-01 12:32 [PATCH 0/3] perf: Remove orphaned children events Jiri Olsa
  2014-08-01 12:33 ` [PATCH 1/3] perf: Do not allow to create kernel events without handler Jiri Olsa
@ 2014-08-01 12:33 ` Jiri Olsa
  2014-08-13  8:21   ` [tip:perf/core] " tip-bot for Jiri Olsa
  2014-08-01 12:33 ` [PATCH 3/3] perf: Add queued work to remove orphaned child events Jiri Olsa
  2 siblings, 1 reply; 8+ messages in thread
From: Jiri Olsa @ 2014-08-01 12:33 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jiri Olsa, Arnaldo Carvalho de Melo, Corey Ashford,
	Frederic Weisbecker, Mark Rutland, Paul Mackerras,
	Peter Zijlstra

Adding fake EVENT_OWNER_KERNEL owner pointer value for kernel perf
events, so we could distinguish it from user events, which needs
special care in following patch.

Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 kernel/events/core.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 75c4ff8d1590..c2fba5736405 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -119,6 +119,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
 	return data.ret;
 }
 
+#define EVENT_OWNER_KERNEL ((void *) -1)
+
+static bool is_kernel_event(struct perf_event *event)
+{
+	return event->owner == EVENT_OWNER_KERNEL;
+}
+
 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
 		       PERF_FLAG_FD_OUTPUT  |\
 		       PERF_FLAG_PID_CGROUP |\
@@ -3312,16 +3319,12 @@ static void free_event(struct perf_event *event)
 }
 
 /*
- * Called when the last reference to the file is gone.
+ * Remove user event from the owner task.
  */
-static void put_event(struct perf_event *event)
+static void perf_remove_from_owner(struct perf_event *event)
 {
-	struct perf_event_context *ctx = event->ctx;
 	struct task_struct *owner;
 
-	if (!atomic_long_dec_and_test(&event->refcount))
-		return;
-
 	rcu_read_lock();
 	owner = ACCESS_ONCE(event->owner);
 	/*
@@ -3354,6 +3357,20 @@ static void put_event(struct perf_event *event)
 		mutex_unlock(&owner->perf_event_mutex);
 		put_task_struct(owner);
 	}
+}
+
+/*
+ * Called when the last reference to the file is gone.
+ */
+static void put_event(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+
+	if (!atomic_long_dec_and_test(&event->refcount))
+		return;
+
+	if (!is_kernel_event(event))
+		perf_remove_from_owner(event);
 
 	WARN_ON_ONCE(ctx->parent_ctx);
 	/*
@@ -7369,6 +7386,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 		goto err;
 	}
 
+	/* Mark owner so we could distinguish it from user events. */
+	event->owner = EVENT_OWNER_KERNEL;
+
 	account_event(event);
 
 	ctx = find_get_context(event->pmu, task, cpu);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/3] perf: Add queued work to remove orphaned child events
  2014-08-01 12:32 [PATCH 0/3] perf: Remove orphaned children events Jiri Olsa
  2014-08-01 12:33 ` [PATCH 1/3] perf: Do not allow to create kernel events without handler Jiri Olsa
  2014-08-01 12:33 ` [PATCH 2/3] perf: Set owner pointer for kernel events Jiri Olsa
@ 2014-08-01 12:33 ` Jiri Olsa
  2014-08-13  8:22   ` [tip:perf/core] " tip-bot for Jiri Olsa
  2 siblings, 1 reply; 8+ messages in thread
From: Jiri Olsa @ 2014-08-01 12:33 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jiri Olsa, Arnaldo Carvalho de Melo, Corey Ashford,
	Frederic Weisbecker, Mark Rutland, Paul Mackerras,
	Peter Zijlstra

In cases when the  owner task exits before the workload and the
workload made some forks, all the events stay in until the last
workload process exits. Thats' because each child event holds
parent reference.

We want to release all children events once the parent is gone,
because at that time there's no process to read them anyway, so
they're just eating resources.

This removal  races with process exit, which removes all events
and fork, which clone events.  To be clear of those two, adding
work queue to remove orphaned child for context in case such
event is detected.

Using delayed work queue (with delay == 1), because we queue this
work under perf scheduler callbacks. Normal work queue tries to wake
up the queue process, which deadlocks on rq->lock in this place.

Also preventing clones from abandoned parent event.

Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/perf_event.h |  4 +++
 kernel/events/core.c       | 87 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 707617a8c0f6..ef5b62bdb103 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,6 +52,7 @@ struct perf_guest_info_callbacks {
 #include <linux/atomic.h>
 #include <linux/sysfs.h>
 #include <linux/perf_regs.h>
+#include <linux/workqueue.h>
 #include <asm/local.h>
 
 struct perf_callchain_entry {
@@ -507,6 +508,9 @@ struct perf_event_context {
 	int				nr_cgroups;	 /* cgroup evts */
 	int				nr_branch_stack; /* branch_stack evt */
 	struct rcu_head			rcu_head;
+
+	struct delayed_work		orphans_remove;
+	bool				orphans_remove_sched;
 };
 
 /*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c2fba5736405..f412d9e20111 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -46,6 +46,8 @@
 
 #include <asm/irq_regs.h>
 
+static struct workqueue_struct *perf_wq;
+
 struct remote_function_call {
 	struct task_struct	*p;
 	int			(*func)(void *info);
@@ -1381,6 +1383,45 @@ out:
 		perf_event__header_size(tmp);
 }
 
+/*
+ * User event without the task.
+ */
+static bool is_orphaned_event(struct perf_event *event)
+{
+	return event && !is_kernel_event(event) && !event->owner;
+}
+
+/*
+ * Event has a parent but parent's task finished and it's
+ * alive only because of children holding refference.
+ */
+static bool is_orphaned_child(struct perf_event *event)
+{
+	return is_orphaned_event(event->parent);
+}
+
+static void orphans_remove_work(struct work_struct *work);
+
+static void schedule_orphans_remove(struct perf_event_context *ctx)
+{
+	if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
+		return;
+
+	if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
+		get_ctx(ctx);
+		ctx->orphans_remove_sched = true;
+	}
+}
+
+static int __init perf_workqueue_init(void)
+{
+	perf_wq = create_singlethread_workqueue("perf");
+	WARN(!perf_wq, "failed to create perf workqueue\n");
+	return perf_wq ? 0 : -1;
+}
+
+core_initcall(perf_workqueue_init);
+
 static inline int
 event_filter_match(struct perf_event *event)
 {
@@ -1430,6 +1471,9 @@ event_sched_out(struct perf_event *event,
 	if (event->attr.exclusive || !cpuctx->active_oncpu)
 		cpuctx->exclusive = 0;
 
+	if (is_orphaned_child(event))
+		schedule_orphans_remove(ctx);
+
 	perf_pmu_enable(event->pmu);
 }
 
@@ -1732,6 +1776,9 @@ event_sched_in(struct perf_event *event,
 	if (event->attr.exclusive)
 		cpuctx->exclusive = 1;
 
+	if (is_orphaned_child(event))
+		schedule_orphans_remove(ctx);
+
 out:
 	perf_pmu_enable(event->pmu);
 
@@ -3074,6 +3121,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
 	INIT_LIST_HEAD(&ctx->flexible_groups);
 	INIT_LIST_HEAD(&ctx->event_list);
 	atomic_set(&ctx->refcount, 1);
+	INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
 }
 
 static struct perf_event_context *
@@ -3405,6 +3453,42 @@ static int perf_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+/*
+ * Remove all orphanes events from the context.
+ */
+static void orphans_remove_work(struct work_struct *work)
+{
+	struct perf_event_context *ctx;
+	struct perf_event *event, *tmp;
+
+	ctx = container_of(work, struct perf_event_context,
+			   orphans_remove.work);
+
+	mutex_lock(&ctx->mutex);
+	list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
+		struct perf_event *parent_event = event->parent;
+
+		if (!is_orphaned_child(event))
+			continue;
+
+		perf_remove_from_context(event, true);
+
+		mutex_lock(&parent_event->child_mutex);
+		list_del_init(&event->child_list);
+		mutex_unlock(&parent_event->child_mutex);
+
+		free_event(event);
+		put_event(parent_event);
+	}
+
+	raw_spin_lock_irq(&ctx->lock);
+	ctx->orphans_remove_sched = false;
+	raw_spin_unlock_irq(&ctx->lock);
+	mutex_unlock(&ctx->mutex);
+
+	put_ctx(ctx);
+}
+
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
 	struct perf_event *child;
@@ -7712,7 +7796,8 @@ inherit_event(struct perf_event *parent_event,
 	if (IS_ERR(child_event))
 		return child_event;
 
-	if (!atomic_long_inc_not_zero(&parent_event->refcount)) {
+	if (is_orphaned_event(parent_event) ||
+	    !atomic_long_inc_not_zero(&parent_event->refcount)) {
 		free_event(child_event);
 		return NULL;
 	}
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] perf: Do not allow to create kernel events without handler
  2014-08-01 12:33 ` [PATCH 1/3] perf: Do not allow to create kernel events without handler Jiri Olsa
@ 2014-08-01 14:56   ` Peter Zijlstra
  2014-08-01 15:19     ` Jiri Olsa
  0 siblings, 1 reply; 8+ messages in thread
From: Peter Zijlstra @ 2014-08-01 14:56 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Corey Ashford,
	Frederic Weisbecker, Mark Rutland, Paul Mackerras

[-- Attachment #1: Type: text/plain, Size: 5026 bytes --]

On Fri, Aug 01, 2014 at 02:33:00PM +0200, Jiri Olsa wrote:
> Force kernel events to specify the handler, because
> there's no use for kernel perf event without it.
> 

I think I found a reason; although there is currently no such user, the
simple counting events, they don't have overflow handlers at all.

I think I did on once, but never merged that code because it was a quick
dev hack to create nice changelog numbers etc..

/me goes dig... found it:


---
 include/linux/perf_event.h |    1 
 kernel/events/core.c       |   22 +++++++-
 kernel/sched/clock.c       |  118 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 138 insertions(+), 3 deletions(-)

--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -561,6 +561,7 @@ extern void perf_pmu_migrate_context(str
 				int src_cpu, int dst_cpu);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
+extern u64 perf_event_read(struct perf_event *event);
 
 
 struct perf_sample_data {
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2973,15 +2973,31 @@ static inline u64 perf_event_count(struc
 	return local64_read(&event->count) + atomic64_read(&event->child_count);
 }
 
-static u64 perf_event_read(struct perf_event *event)
+u64 perf_event_read(struct perf_event *event)
 {
 	/*
 	 * If event is enabled and currently active on a CPU, update the
 	 * value in the event structure:
 	 */
 	if (event->state == PERF_EVENT_STATE_ACTIVE) {
-		smp_call_function_single(event->oncpu,
-					 __perf_event_read, event, 1);
+		/*
+		 * If the event is for the current task, its guaranteed that we
+		 * never need the cross cpu call, and therefore can allow this
+		 * to be called with IRQs disabled.
+		 *
+		 * Avoids the warning otherwise generated by
+		 * smp_call_function_single().
+		 */
+		if (event->ctx->task == current) {
+			unsigned long flags;
+
+			local_irq_save(flags);
+			__perf_event_read(event);
+			local_irq_restore(flags);
+		} else {
+			smp_call_function_single(event->oncpu,
+					__perf_event_read, event, 1);
+		}
 	} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
 		struct perf_event_context *ctx = event->ctx;
 		unsigned long flags;
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -387,3 +387,121 @@ u64 local_clock(void)
 
 EXPORT_SYMBOL_GPL(cpu_clock);
 EXPORT_SYMBOL_GPL(local_clock);
+
+#include <linux/perf_event.h>
+
+static char sched_clock_cache[12*1024*1024]; /* 12M l3 cache */
+static struct perf_event *__sched_clock_cycles;
+
+static __init u64 sched_clock_cycles(void)
+{
+	return perf_event_read(__sched_clock_cycles);
+}
+
+static __init noinline void sched_clock_wipe_cache(void)
+{
+	int i;
+
+	for (i = 0; i < sizeof(sched_clock_cache); i++)
+		ACCESS_ONCE(sched_clock_cache[i]) = 0;
+}
+
+static __always_inline u64 cache_cold_clock(u64 (*clock)(void))
+{
+	u64 cycles;
+
+	local_irq_disable();
+	sched_clock_wipe_cache();
+	cycles = sched_clock_cycles();
+	(void)clock();
+	cycles = sched_clock_cycles() - cycles;
+	local_irq_enable();
+
+	return cycles;
+}
+
+static __init void do_bench(void)
+{
+	u64 cycles;
+	u64 tmp;
+	int i;
+
+	printk("sched_clock_stable: %d\n", sched_clock_stable);
+
+	cycles = 0;
+	for (i = 0; i < 1000; i++)
+		cycles += cache_cold_clock(&sched_clock);
+
+	printk("(cold) sched_clock: %lu\n", cycles);
+
+	cycles = 0;
+	for (i = 0; i < 1000; i++)
+		cycles += cache_cold_clock(&local_clock);
+
+	printk("(cold) local_clock: %lu\n", cycles);
+
+	local_irq_disable();
+	ACCESS_ONCE(tmp) = sched_clock();
+
+	cycles = sched_clock_cycles();
+
+	for (i = 0; i < 1000; i++)
+		ACCESS_ONCE(tmp) = sched_clock();
+
+	cycles = sched_clock_cycles() - cycles;
+	local_irq_enable();
+
+	printk("(warm) sched_clock: %lu\n", cycles);
+
+	local_irq_disable();
+	ACCESS_ONCE(tmp) = local_clock();
+
+	cycles = sched_clock_cycles();
+
+	for (i = 0; i < 1000; i++)
+		ACCESS_ONCE(tmp) = local_clock();
+
+	cycles = sched_clock_cycles() - cycles;
+	local_irq_enable();
+
+	printk("(warm) local_clock: %lu\n", cycles);
+
+	local_irq_disable();
+	rdtscll(ACCESS_ONCE(tmp));
+
+	cycles = sched_clock_cycles();
+
+	for (i = 0; i < 1000; i++)
+		rdtscll(ACCESS_ONCE(tmp));
+
+	cycles = sched_clock_cycles() - cycles;
+	local_irq_enable();
+
+	printk("(warm) rdtsc: %lu\n", cycles);
+}
+
+static __init int sched_clock_bench(void)
+{
+	struct perf_event_attr perf_attr = {
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+		.size = sizeof(struct perf_event_attr),
+		.pinned = 1,
+	};
+
+	__sched_clock_cycles = perf_event_create_kernel_counter(&perf_attr, -1, current, NULL, NULL);
+
+	sched_clock_stable = 1;
+	do_bench();
+
+	sched_clock_stable = 0;
+	do_bench();
+
+	sched_clock_stable = 1;
+
+	perf_event_release_kernel(__sched_clock_cycles);
+
+	return 0;
+}
+
+late_initcall(sched_clock_bench);

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] perf: Do not allow to create kernel events without handler
  2014-08-01 14:56   ` Peter Zijlstra
@ 2014-08-01 15:19     ` Jiri Olsa
  0 siblings, 0 replies; 8+ messages in thread
From: Jiri Olsa @ 2014-08-01 15:19 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jiri Olsa, linux-kernel, Arnaldo Carvalho de Melo, Corey Ashford,
	Frederic Weisbecker, Mark Rutland, Paul Mackerras

On Fri, Aug 01, 2014 at 04:56:37PM +0200, Peter Zijlstra wrote:
> On Fri, Aug 01, 2014 at 02:33:00PM +0200, Jiri Olsa wrote:
> > Force kernel events to specify the handler, because
> > there's no use for kernel perf event without it.
> > 
> 
> I think I found a reason; although there is currently no such user, the
> simple counting events, they don't have overflow handlers at all.

ah right.. ok 

jirka

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [tip:perf/core] perf: Set owner pointer for kernel events
  2014-08-01 12:33 ` [PATCH 2/3] perf: Set owner pointer for kernel events Jiri Olsa
@ 2014-08-13  8:21   ` tip-bot for Jiri Olsa
  0 siblings, 0 replies; 8+ messages in thread
From: tip-bot for Jiri Olsa @ 2014-08-13  8:21 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, paulus, hpa, mingo, jolsa, torvalds, peterz,
	mark.rutland, acme, fweisbec, tglx, cjashfor

Commit-ID:  f86977620ee4635f26befcf436700493a38ce002
Gitweb:     http://git.kernel.org/tip/f86977620ee4635f26befcf436700493a38ce002
Author:     Jiri Olsa <jolsa@kernel.org>
AuthorDate: Fri, 1 Aug 2014 14:33:01 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 13 Aug 2014 07:51:03 +0200

perf: Set owner pointer for kernel events

Adding fake EVENT_OWNER_KERNEL owner pointer value for kernel perf
events, so we could distinguish it from user events, which needs
special care in following patch.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1406896382-18404-3-git-send-email-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1cf24b3..bbb3ca2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -119,6 +119,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
 	return data.ret;
 }
 
+#define EVENT_OWNER_KERNEL ((void *) -1)
+
+static bool is_kernel_event(struct perf_event *event)
+{
+	return event->owner == EVENT_OWNER_KERNEL;
+}
+
 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
 		       PERF_FLAG_FD_OUTPUT  |\
 		       PERF_FLAG_PID_CGROUP |\
@@ -3312,16 +3319,12 @@ static void free_event(struct perf_event *event)
 }
 
 /*
- * Called when the last reference to the file is gone.
+ * Remove user event from the owner task.
  */
-static void put_event(struct perf_event *event)
+static void perf_remove_from_owner(struct perf_event *event)
 {
-	struct perf_event_context *ctx = event->ctx;
 	struct task_struct *owner;
 
-	if (!atomic_long_dec_and_test(&event->refcount))
-		return;
-
 	rcu_read_lock();
 	owner = ACCESS_ONCE(event->owner);
 	/*
@@ -3354,6 +3357,20 @@ static void put_event(struct perf_event *event)
 		mutex_unlock(&owner->perf_event_mutex);
 		put_task_struct(owner);
 	}
+}
+
+/*
+ * Called when the last reference to the file is gone.
+ */
+static void put_event(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+
+	if (!atomic_long_dec_and_test(&event->refcount))
+		return;
+
+	if (!is_kernel_event(event))
+		perf_remove_from_owner(event);
 
 	WARN_ON_ONCE(ctx->parent_ctx);
 	/*
@@ -7366,6 +7383,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 		goto err;
 	}
 
+	/* Mark owner so we could distinguish it from user events. */
+	event->owner = EVENT_OWNER_KERNEL;
+
 	account_event(event);
 
 	ctx = find_get_context(event->pmu, task, cpu);

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [tip:perf/core] perf: Add queued work to remove orphaned child events
  2014-08-01 12:33 ` [PATCH 3/3] perf: Add queued work to remove orphaned child events Jiri Olsa
@ 2014-08-13  8:22   ` tip-bot for Jiri Olsa
  0 siblings, 0 replies; 8+ messages in thread
From: tip-bot for Jiri Olsa @ 2014-08-13  8:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, paulus, hpa, mingo, jolsa, torvalds, peterz,
	mark.rutland, acme, fweisbec, tglx, cjashfor

Commit-ID:  fadfe7be6e50de7f03913833b33c56cd8fb66bac
Gitweb:     http://git.kernel.org/tip/fadfe7be6e50de7f03913833b33c56cd8fb66bac
Author:     Jiri Olsa <jolsa@kernel.org>
AuthorDate: Fri, 1 Aug 2014 14:33:02 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 13 Aug 2014 07:51:04 +0200

perf: Add queued work to remove orphaned child events

In cases when the  owner task exits before the workload and the
workload made some forks, all the events stay in until the last
workload process exits. Thats' because each child event holds
parent reference.

We want to release all children events once the parent is gone,
because at that time there's no process to read them anyway, so
they're just eating resources.

This removal  races with process exit, which removes all events
and fork, which clone events.  To be clear of those two, adding
work queue to remove orphaned child for context in case such
event is detected.

Using delayed work queue (with delay == 1), because we queue this
work under perf scheduler callbacks. Normal work queue tries to wake
up the queue process, which deadlocks on rq->lock in this place.

Also preventing clones from abandoned parent event.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1406896382-18404-4-git-send-email-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h |  4 +++
 kernel/events/core.c       | 87 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 707617a..ef5b62b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,6 +52,7 @@ struct perf_guest_info_callbacks {
 #include <linux/atomic.h>
 #include <linux/sysfs.h>
 #include <linux/perf_regs.h>
+#include <linux/workqueue.h>
 #include <asm/local.h>
 
 struct perf_callchain_entry {
@@ -507,6 +508,9 @@ struct perf_event_context {
 	int				nr_cgroups;	 /* cgroup evts */
 	int				nr_branch_stack; /* branch_stack evt */
 	struct rcu_head			rcu_head;
+
+	struct delayed_work		orphans_remove;
+	bool				orphans_remove_sched;
 };
 
 /*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bbb3ca2..a254605 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -46,6 +46,8 @@
 
 #include <asm/irq_regs.h>
 
+static struct workqueue_struct *perf_wq;
+
 struct remote_function_call {
 	struct task_struct	*p;
 	int			(*func)(void *info);
@@ -1381,6 +1383,45 @@ out:
 		perf_event__header_size(tmp);
 }
 
+/*
+ * User event without the task.
+ */
+static bool is_orphaned_event(struct perf_event *event)
+{
+	return event && !is_kernel_event(event) && !event->owner;
+}
+
+/*
+ * Event has a parent but parent's task finished and it's
+ * alive only because of children holding refference.
+ */
+static bool is_orphaned_child(struct perf_event *event)
+{
+	return is_orphaned_event(event->parent);
+}
+
+static void orphans_remove_work(struct work_struct *work);
+
+static void schedule_orphans_remove(struct perf_event_context *ctx)
+{
+	if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
+		return;
+
+	if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
+		get_ctx(ctx);
+		ctx->orphans_remove_sched = true;
+	}
+}
+
+static int __init perf_workqueue_init(void)
+{
+	perf_wq = create_singlethread_workqueue("perf");
+	WARN(!perf_wq, "failed to create perf workqueue\n");
+	return perf_wq ? 0 : -1;
+}
+
+core_initcall(perf_workqueue_init);
+
 static inline int
 event_filter_match(struct perf_event *event)
 {
@@ -1430,6 +1471,9 @@ event_sched_out(struct perf_event *event,
 	if (event->attr.exclusive || !cpuctx->active_oncpu)
 		cpuctx->exclusive = 0;
 
+	if (is_orphaned_child(event))
+		schedule_orphans_remove(ctx);
+
 	perf_pmu_enable(event->pmu);
 }
 
@@ -1732,6 +1776,9 @@ event_sched_in(struct perf_event *event,
 	if (event->attr.exclusive)
 		cpuctx->exclusive = 1;
 
+	if (is_orphaned_child(event))
+		schedule_orphans_remove(ctx);
+
 out:
 	perf_pmu_enable(event->pmu);
 
@@ -3074,6 +3121,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
 	INIT_LIST_HEAD(&ctx->flexible_groups);
 	INIT_LIST_HEAD(&ctx->event_list);
 	atomic_set(&ctx->refcount, 1);
+	INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
 }
 
 static struct perf_event_context *
@@ -3405,6 +3453,42 @@ static int perf_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+/*
+ * Remove all orphanes events from the context.
+ */
+static void orphans_remove_work(struct work_struct *work)
+{
+	struct perf_event_context *ctx;
+	struct perf_event *event, *tmp;
+
+	ctx = container_of(work, struct perf_event_context,
+			   orphans_remove.work);
+
+	mutex_lock(&ctx->mutex);
+	list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
+		struct perf_event *parent_event = event->parent;
+
+		if (!is_orphaned_child(event))
+			continue;
+
+		perf_remove_from_context(event, true);
+
+		mutex_lock(&parent_event->child_mutex);
+		list_del_init(&event->child_list);
+		mutex_unlock(&parent_event->child_mutex);
+
+		free_event(event);
+		put_event(parent_event);
+	}
+
+	raw_spin_lock_irq(&ctx->lock);
+	ctx->orphans_remove_sched = false;
+	raw_spin_unlock_irq(&ctx->lock);
+	mutex_unlock(&ctx->mutex);
+
+	put_ctx(ctx);
+}
+
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
 	struct perf_event *child;
@@ -7709,7 +7793,8 @@ inherit_event(struct perf_event *parent_event,
 	if (IS_ERR(child_event))
 		return child_event;
 
-	if (!atomic_long_inc_not_zero(&parent_event->refcount)) {
+	if (is_orphaned_event(parent_event) ||
+	    !atomic_long_inc_not_zero(&parent_event->refcount)) {
 		free_event(child_event);
 		return NULL;
 	}

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-08-13  8:23 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-01 12:32 [PATCH 0/3] perf: Remove orphaned children events Jiri Olsa
2014-08-01 12:33 ` [PATCH 1/3] perf: Do not allow to create kernel events without handler Jiri Olsa
2014-08-01 14:56   ` Peter Zijlstra
2014-08-01 15:19     ` Jiri Olsa
2014-08-01 12:33 ` [PATCH 2/3] perf: Set owner pointer for kernel events Jiri Olsa
2014-08-13  8:21   ` [tip:perf/core] " tip-bot for Jiri Olsa
2014-08-01 12:33 ` [PATCH 3/3] perf: Add queued work to remove orphaned child events Jiri Olsa
2014-08-13  8:22   ` [tip:perf/core] " tip-bot for Jiri Olsa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.