From: Stephane Eranian <eranian@google.com>
To: linux-kernel@vger.kernel.org
Cc: peterz@infradead.org, mingo@elte.hu, ak@linux.intel.com,
edumazet@google.com, acme@redhat.com
Subject: [PATCH v2] perf/core: fix RCU issues with cgroup monitoring mode
Date: Tue, 27 Oct 2015 20:25:26 +0100 [thread overview]
Message-ID: <20151027192526.GA32018@thinkpad> (raw)
This patch eliminates all known RCU violations detected
by the RCU checker (PROVE_RCU). The impact code paths
were all related to cgroup mode monitoring and involved
access a task's cgrp.
V2 is updated to include suggestions from PeterZ to eliminate
some of the warnings without grabbing the rcu_read lock because
we know we are already holding the ctx->lock which prevents
the cgroup from disappearing while we are accessing it.
The trick, as suggested by Peter, is to modify the
perf_cgroup_from_task() to take an extra boolean parameter
to allow bypassing the lockdep test in the task_subsys_cstate()
macros. This patch uses this approach to update all calls the
perf_cgroup_from_task().
V2 Patch relative to:
8b3c8e6 Revert "rculist: Make list_entry_rcu() use lockless_dereference()"
Signed-off-by: Stephane Eranian <eranian@google.com>
---
arch/x86/kernel/cpu/perf_event_intel_cqm.c | 2 +-
include/linux/perf_event.h | 4 ++--
kernel/events/core.c | 27 +++++++++++++++++----------
3 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 377e8f8..d96bbf1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -298,7 +298,7 @@ static bool __match_event(struct perf_event *a, struct perf_event *b)
static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
{
if (event->attach_state & PERF_ATTACH_TASK)
- return perf_cgroup_from_task(event->hw.target);
+ return perf_cgroup_from_task(event->hw.target, false);
return event->cgrp;
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d841d33..24f3539 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -697,9 +697,9 @@ struct perf_cgroup {
* if there is no cgroup event for the current CPU context.
*/
static inline struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task)
+perf_cgroup_from_task(struct task_struct *task, bool safe)
{
- return container_of(task_css(task, perf_event_cgrp_id),
+ return container_of(task_css_check(task, perf_event_cgrp_id, safe),
struct perf_cgroup, css);
}
#endif /* CONFIG_CGROUP_PERF */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ea02109..2003240 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -435,7 +435,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
if (!is_cgroup_event(event))
return;
- cgrp = perf_cgroup_from_task(current);
+ /* holding ctx->lock, so cgroup access is safe */
+ cgrp = perf_cgroup_from_task(current, true);
/*
* Do not update time when cgroup is not active
*/
@@ -458,7 +459,8 @@ perf_cgroup_set_timestamp(struct task_struct *task,
if (!task || !ctx->nr_cgroups)
return;
- cgrp = perf_cgroup_from_task(task);
+ /* holding ctx->lock, so cgroup access is safe */
+ cgrp = perf_cgroup_from_task(task, true);
info = this_cpu_ptr(cgrp->info);
info->timestamp = ctx->timestamp;
}
@@ -489,7 +491,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
* we reschedule only in the presence of cgroup
* constrained events.
*/
- rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -523,7 +524,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
* event_filter_match() to not have to pass
* task around
*/
- cpuctx->cgrp = perf_cgroup_from_task(task);
+ cpuctx->cgrp = perf_cgroup_from_task(task, false);
cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
}
perf_pmu_enable(cpuctx->ctx.pmu);
@@ -531,8 +532,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
}
}
- rcu_read_unlock();
-
local_irq_restore(flags);
}
@@ -542,17 +541,18 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
struct perf_cgroup *cgrp1;
struct perf_cgroup *cgrp2 = NULL;
+ rcu_read_lock();
/*
* we come here when we know perf_cgroup_events > 0
*/
- cgrp1 = perf_cgroup_from_task(task);
+ cgrp1 = perf_cgroup_from_task(task, false);
/*
* next is NULL when called from perf_event_enable_on_exec()
* that will systematically cause a cgroup_switch()
*/
if (next)
- cgrp2 = perf_cgroup_from_task(next);
+ cgrp2 = perf_cgroup_from_task(next, false);
/*
* only schedule out current cgroup events if we know
@@ -561,6 +561,8 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
*/
if (cgrp1 != cgrp2)
perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+
+ rcu_read_unlock();
}
static inline void perf_cgroup_sched_in(struct task_struct *prev,
@@ -569,13 +571,14 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
struct perf_cgroup *cgrp1;
struct perf_cgroup *cgrp2 = NULL;
+ rcu_read_lock();
/*
* we come here when we know perf_cgroup_events > 0
*/
- cgrp1 = perf_cgroup_from_task(task);
+ cgrp1 = perf_cgroup_from_task(task, false);
/* prev can never be NULL */
- cgrp2 = perf_cgroup_from_task(prev);
+ cgrp2 = perf_cgroup_from_task(prev, false);
/*
* only need to schedule in cgroup events if we are changing
@@ -584,6 +587,8 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
*/
if (cgrp1 != cgrp2)
perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+
+ rcu_read_unlock();
}
static inline int perf_cgroup_connect(int fd, struct perf_event *event,
@@ -9442,7 +9447,9 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
static int __perf_cgroup_move(void *info)
{
struct task_struct *task = info;
+ rcu_read_lock();
perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
+ rcu_read_unlock();
return 0;
}
--
2.1.4
next reply other threads:[~2015-10-27 19:25 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-27 19:25 Stephane Eranian [this message]
2015-10-27 20:24 ` [PATCH v2] perf/core: fix RCU issues with cgroup monitoring mode Eric Dumazet
2015-10-27 22:34 ` Peter Zijlstra
2015-10-27 23:37 ` Stephane Eranian
2015-10-28 0:17 ` Eric Dumazet
2015-10-28 0:27 ` Stephane Eranian
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20151027192526.GA32018@thinkpad \
--to=eranian@google.com \
--cc=acme@redhat.com \
--cc=ak@linux.intel.com \
--cc=edumazet@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).