From: Marco Elver <elver@google.com>
To: elver@google.com, peterz@infradead.org,
alexander.shishkin@linux.intel.com, acme@kernel.org,
mingo@redhat.com, jolsa@redhat.com, mark.rutland@arm.com,
namhyung@kernel.org, tglx@linutronix.de
Cc: glider@google.com, viro@zeniv.linux.org.uk, arnd@arndb.de,
christian@brauner.io, dvyukov@google.com, jannh@google.com,
axboe@kernel.dk, mascasa@google.com, pcc@google.com,
irogers@google.com, kasan-dev@googlegroups.com,
linux-arch@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-kernel@vger.kernel.org, x86@kernel.org,
linux-kselftest@vger.kernel.org
Subject: [PATCH v3 01/11] perf: Rework perf_event_exit_event()
Date: Wed, 24 Mar 2021 12:24:53 +0100 [thread overview]
Message-ID: <20210324112503.623833-2-elver@google.com> (raw)
In-Reply-To: <20210324112503.623833-1-elver@google.com>
From: Peter Zijlstra <peterz@infradead.org>
Make perf_event_exit_event() more robust, such that we can use it from
other contexts. Specifically the up and coming remove_on_exec.
For this to work we need to address a few issues. Remove_on_exec will
not destroy the entire context, so we cannot rely on TASK_TOMBSTONE to
disable event_function_call() and we thus have to use
perf_remove_from_context().
When using perf_remove_from_context(), there's two races to consider.
The first is against close(), where we can have concurrent tear-down
of the event. The second is against child_list iteration, which should
not find a half baked event.
To address this, teach perf_remove_from_context() to special case
!ctx->is_active and about DETACH_CHILD.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Marco Elver <elver@google.com>
---
v3:
* New dependency for series:
https://lkml.kernel.org/r/YFn/I3aKF+TOjGcl@hirez.programming.kicks-ass.net
---
include/linux/perf_event.h | 1 +
kernel/events/core.c | 142 +++++++++++++++++++++----------------
2 files changed, 80 insertions(+), 63 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3f7f89ea5e51..3d478abf411c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -607,6 +607,7 @@ struct swevent_hlist {
#define PERF_ATTACH_TASK_DATA 0x08
#define PERF_ATTACH_ITRACE 0x10
#define PERF_ATTACH_SCHED_CB 0x20
+#define PERF_ATTACH_CHILD 0x40
struct perf_cgroup;
struct perf_buffer;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 03db40f6cba9..57de8d436efd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2204,6 +2204,26 @@ static void perf_group_detach(struct perf_event *event)
perf_event__header_size(leader);
}
+static void sync_child_event(struct perf_event *child_event);
+
+static void perf_child_detach(struct perf_event *event)
+{
+ struct perf_event *parent_event = event->parent;
+
+ if (!(event->attach_state & PERF_ATTACH_CHILD))
+ return;
+
+ event->attach_state &= ~PERF_ATTACH_CHILD;
+
+ if (WARN_ON_ONCE(!parent_event))
+ return;
+
+ lockdep_assert_held(&parent_event->child_mutex);
+
+ sync_child_event(event);
+ list_del_init(&event->child_list);
+}
+
static bool is_orphaned_event(struct perf_event *event)
{
return event->state == PERF_EVENT_STATE_DEAD;
@@ -2311,6 +2331,7 @@ group_sched_out(struct perf_event *group_event,
}
#define DETACH_GROUP 0x01UL
+#define DETACH_CHILD 0x02UL
/*
* Cross CPU call to remove a performance event
@@ -2334,6 +2355,8 @@ __perf_remove_from_context(struct perf_event *event,
event_sched_out(event, cpuctx, ctx);
if (flags & DETACH_GROUP)
perf_group_detach(event);
+ if (flags & DETACH_CHILD)
+ perf_child_detach(event);
list_del_event(event, ctx);
if (!ctx->nr_events && ctx->is_active) {
@@ -2362,25 +2385,21 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla
lockdep_assert_held(&ctx->mutex);
- event_function_call(event, __perf_remove_from_context, (void *)flags);
-
/*
- * The above event_function_call() can NO-OP when it hits
- * TASK_TOMBSTONE. In that case we must already have been detached
- * from the context (by perf_event_exit_event()) but the grouping
- * might still be in-tact.
+ * Because of perf_event_exit_task(), perf_remove_from_context() ought
+ * to work in the face of TASK_TOMBSTONE, unlike every other
+ * event_function_call() user.
*/
- WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
- if ((flags & DETACH_GROUP) &&
- (event->attach_state & PERF_ATTACH_GROUP)) {
- /*
- * Since in that case we cannot possibly be scheduled, simply
- * detach now.
- */
- raw_spin_lock_irq(&ctx->lock);
- perf_group_detach(event);
+ raw_spin_lock_irq(&ctx->lock);
+ if (!ctx->is_active) {
+ __perf_remove_from_context(event, __get_cpu_context(ctx),
+ ctx, (void *)flags);
raw_spin_unlock_irq(&ctx->lock);
+ return;
}
+ raw_spin_unlock_irq(&ctx->lock);
+
+ event_function_call(event, __perf_remove_from_context, (void *)flags);
}
/*
@@ -12373,14 +12392,17 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
}
EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
-static void sync_child_event(struct perf_event *child_event,
- struct task_struct *child)
+static void sync_child_event(struct perf_event *child_event)
{
struct perf_event *parent_event = child_event->parent;
u64 child_val;
- if (child_event->attr.inherit_stat)
- perf_event_read_event(child_event, child);
+ if (child_event->attr.inherit_stat) {
+ struct task_struct *task = child_event->ctx->task;
+
+ if (task)
+ perf_event_read_event(child_event, task);
+ }
child_val = perf_event_count(child_event);
@@ -12395,60 +12417,53 @@ static void sync_child_event(struct perf_event *child_event,
}
static void
-perf_event_exit_event(struct perf_event *child_event,
- struct perf_event_context *child_ctx,
- struct task_struct *child)
+perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
{
- struct perf_event *parent_event = child_event->parent;
+ struct perf_event *parent_event = event->parent;
+ unsigned long detach_flags = 0;
- /*
- * Do not destroy the 'original' grouping; because of the context
- * switch optimization the original events could've ended up in a
- * random child task.
- *
- * If we were to destroy the original group, all group related
- * operations would cease to function properly after this random
- * child dies.
- *
- * Do destroy all inherited groups, we don't care about those
- * and being thorough is better.
- */
- raw_spin_lock_irq(&child_ctx->lock);
- WARN_ON_ONCE(child_ctx->is_active);
+ if (parent_event) {
+ /*
+ * Do not destroy the 'original' grouping; because of the
+ * context switch optimization the original events could've
+ * ended up in a random child task.
+ *
+ * If we were to destroy the original group, all group related
+ * operations would cease to function properly after this
+ * random child dies.
+ *
+ * Do destroy all inherited groups, we don't care about those
+ * and being thorough is better.
+ */
+ detach_flags = DETACH_GROUP | DETACH_CHILD;
+ mutex_lock(&parent_event->child_mutex);
+ }
- if (parent_event)
- perf_group_detach(child_event);
- list_del_event(child_event, child_ctx);
- perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */
- raw_spin_unlock_irq(&child_ctx->lock);
+ perf_remove_from_context(event, detach_flags);
+
+ raw_spin_lock_irq(&ctx->lock);
+ if (event->state > PERF_EVENT_STATE_EXIT)
+ perf_event_set_state(event, PERF_EVENT_STATE_EXIT);
+ raw_spin_unlock_irq(&ctx->lock);
/*
- * Parent events are governed by their filedesc, retain them.
+ * Child events can be freed.
*/
- if (!parent_event) {
- perf_event_wakeup(child_event);
+ if (parent_event) {
+ mutex_unlock(&parent_event->child_mutex);
+ /*
+ * Kick perf_poll() for is_event_hup();
+ */
+ perf_event_wakeup(parent_event);
+ free_event(event);
+ put_event(parent_event);
return;
}
- /*
- * Child events can be cleaned up.
- */
-
- sync_child_event(child_event, child);
/*
- * Remove this event from the parent's list
- */
- WARN_ON_ONCE(parent_event->ctx->parent_ctx);
- mutex_lock(&parent_event->child_mutex);
- list_del_init(&child_event->child_list);
- mutex_unlock(&parent_event->child_mutex);
-
- /*
- * Kick perf_poll() for is_event_hup().
+ * Parent events are governed by their filedesc, retain them.
*/
- perf_event_wakeup(parent_event);
- free_event(child_event);
- put_event(parent_event);
+ perf_event_wakeup(event);
}
static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
@@ -12505,7 +12520,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
perf_event_task(child, child_ctx, 0);
list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
- perf_event_exit_event(child_event, child_ctx, child);
+ perf_event_exit_event(child_event, child_ctx);
mutex_unlock(&child_ctx->mutex);
@@ -12765,6 +12780,7 @@ inherit_event(struct perf_event *parent_event,
*/
raw_spin_lock_irqsave(&child_ctx->lock, flags);
add_event_to_ctx(child_event, child_ctx);
+ child_event->attach_state |= PERF_ATTACH_CHILD;
raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
/*
--
2.31.0.291.g576ba9dcdaf-goog
next prev parent reply other threads:[~2021-03-24 11:26 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-24 11:24 [PATCH v3 00/11] Add support for synchronous signals on perf events Marco Elver
2021-03-24 11:24 ` Marco Elver [this message]
2021-03-25 10:17 ` [PATCH v3 01/11] perf: Rework perf_event_exit_event() Marco Elver
2021-03-25 16:17 ` Marco Elver
2021-03-25 19:10 ` Marco Elver
2021-03-29 11:50 ` Peter Zijlstra
2021-03-24 11:24 ` [PATCH v3 02/11] perf: Apply PERF_EVENT_IOC_MODIFY_ATTRIBUTES to children Marco Elver
2021-03-24 11:24 ` [PATCH v3 03/11] perf: Support only inheriting events if cloned with CLONE_THREAD Marco Elver
2021-03-24 11:24 ` [PATCH v3 04/11] perf: Add support for event removal on exec Marco Elver
2021-03-24 11:24 ` [PATCH v3 05/11] signal: Introduce TRAP_PERF si_code and si_perf to siginfo Marco Elver
2021-03-24 11:24 ` [PATCH v3 06/11] perf: Add support for SIGTRAP on perf events Marco Elver
2021-03-25 8:14 ` Marco Elver
2021-03-29 12:07 ` Peter Zijlstra
2021-03-29 14:27 ` Oleg Nesterov
2021-03-29 14:32 ` Marco Elver
2021-03-30 7:04 ` Peter Zijlstra
2021-03-29 18:22 ` Marco Elver
2021-03-29 18:33 ` Oleg Nesterov
2021-03-31 12:32 ` Marco Elver
2021-03-31 14:51 ` Peter Zijlstra
2021-03-31 16:50 ` Marco Elver
2021-03-24 11:24 ` [PATCH v3 07/11] perf: Add breakpoint information to siginfo on SIGTRAP Marco Elver
2021-03-24 12:53 ` Peter Zijlstra
2021-03-24 13:01 ` Peter Zijlstra
2021-03-24 13:21 ` Peter Zijlstra
2021-03-24 13:43 ` Peter Zijlstra
2021-03-24 14:00 ` Peter Zijlstra
2021-03-24 14:05 ` Marco Elver
2021-03-24 14:12 ` Dmitry Vyukov
2021-03-24 14:15 ` Dmitry Vyukov
2021-03-25 7:00 ` Marco Elver
2021-03-25 14:18 ` Ingo Molnar
2021-03-25 15:17 ` Marco Elver
2021-03-25 15:35 ` Ingo Molnar
2021-03-24 13:47 ` Marco Elver
2021-03-24 11:25 ` [PATCH v3 08/11] selftests/perf_events: Add kselftest for process-wide sigtrap handling Marco Elver
2021-03-24 11:25 ` [PATCH v3 09/11] selftests/perf_events: Add kselftest for remove_on_exec Marco Elver
2021-03-24 11:25 ` [PATCH v3 10/11] tools headers uapi: Sync tools/include/uapi/linux/perf_event.h Marco Elver
2021-03-24 11:25 ` [PATCH v3 11/11] perf test: Add basic stress test for sigtrap handling Marco Elver
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210324112503.623833-2-elver@google.com \
--to=elver@google.com \
--cc=acme@kernel.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=arnd@arndb.de \
--cc=axboe@kernel.dk \
--cc=christian@brauner.io \
--cc=dvyukov@google.com \
--cc=glider@google.com \
--cc=irogers@google.com \
--cc=jannh@google.com \
--cc=jolsa@redhat.com \
--cc=kasan-dev@googlegroups.com \
--cc=linux-arch@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mascasa@google.com \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=pcc@google.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=viro@zeniv.linux.org.uk \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).