[RFC 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event

* [RFC 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event
@ 2020-12-02 15:02 Namhyung Kim
  2020-12-02 15:02 ` [RFC 2/2] perf tools: Add 'cgroup-switches' software event Namhyung Kim
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Namhyung Kim @ 2020-12-02 15:02 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Jiri Olsa, Ingo Molnar, Peter Zijlstra
  Cc: Mark Rutland, Alexander Shishkin, LKML, Stephane Eranian,
	Ian Rogers, Andi Kleen

This patch adds a new software event to count context switches
involving cgroup switches.  So it's counted only if cgroups of
previous and next tasks are different.

One can argue that we can do this by using existing sched_switch event
with eBPF.  But some systems might not have eBPF for some reason so
I'd like to add this as a simple way.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 include/linux/perf_event.h      | 22 ++++++++++++++++++++++
 include/uapi/linux/perf_event.h |  1 +
 2 files changed, 23 insertions(+)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9a38f579bc76..d6dec422ba03 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1185,6 +1185,27 @@ perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
 	}
 }
 
+#ifdef CONFIG_CGROUP_PERF
+static inline void
+perf_sw_event_cgroup_switch(struct task_struct *prev, struct task_struct *next)
+{
+	struct cgroup *prev_cgrp, *next_cgrp;
+
+	rcu_read_lock();
+
+	prev_cgrp = task_css_check(prev, perf_event_cgrp_id, 1)->cgroup;
+	next_cgrp = task_css_check(next, perf_event_cgrp_id, 1)->cgroup;
+
+	if (prev_cgrp != next_cgrp)
+		perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
+
+	rcu_read_unlock();
+}
+#else
+static inline void perf_sw_event_cgroup_switch(struct task_struct *prev,
+					       struct task_struct *next) {}
+#endif  /* CONFIG_CGROUP_PERF */
+
 extern struct static_key_false perf_sched_events;
 
 static __always_inline bool
@@ -1220,6 +1241,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
 					     struct task_struct *next)
 {
 	perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
+	perf_sw_event_cgroup_switch(prev, next);
 
 	if (static_branch_unlikely(&perf_sched_events))
 		__perf_event_task_sched_out(prev, next);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b15e3447cd9f..16b9538ad89b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -112,6 +112,7 @@ enum perf_sw_ids {
 	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 	PERF_COUNT_SW_DUMMY			= 9,
 	PERF_COUNT_SW_BPF_OUTPUT		= 10,
+	PERF_COUNT_SW_CGROUP_SWITCHES		= 11,
 
 	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
-- 
2.29.2.454.gaff20da3a2-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread