* [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event
@ 2021-01-14 8:50 Namhyung Kim
2021-01-14 8:50 ` [PATCH 2/2] perf tools: Add 'cgroup-switches' software event Namhyung Kim
2021-01-21 8:00 ` [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event Namhyung Kim
0 siblings, 2 replies; 3+ messages in thread
From: Namhyung Kim @ 2021-01-14 8:50 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Jiri Olsa, Peter Zijlstra
Cc: Ingo Molnar, Mark Rutland, Alexander Shishkin, LKML,
Stephane Eranian, Andi Kleen, Ian Rogers
This patch adds a new software event to count context switches
involving cgroup switches. So it's counted only if cgroups of
previous and next tasks are different. Note that it only checks the
cgroups in the perf_event subsystem. For cgroup v2, it shouldn't
matter anyway.
One can argue that we can do this by using existing sched_switch event
with eBPF. But some systems might not have eBPF for some reason so
I'd like to add this as a simple way.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
include/linux/perf_event.h | 38 +++++++++++++++------------------
include/uapi/linux/perf_event.h | 1 +
2 files changed, 18 insertions(+), 21 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9a38f579bc76..304ef42d42d1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1174,30 +1174,24 @@ DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
* which is guaranteed by us not actually scheduling inside other swevents
* because those disable preemption.
*/
-static __always_inline void
-perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
+static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
{
- if (static_key_false(&perf_swevent_enabled[event_id])) {
- struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
+ struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
- perf_fetch_caller_regs(regs);
- ___perf_sw_event(event_id, nr, regs, addr);
- }
+ perf_fetch_caller_regs(regs);
+ ___perf_sw_event(event_id, nr, regs, addr);
}
extern struct static_key_false perf_sched_events;
-static __always_inline bool
-perf_sw_migrate_enabled(void)
+static __always_inline bool __perf_sw_enabled(int swevt)
{
- if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
- return true;
- return false;
+ return static_key_false(&perf_swevent_enabled[swevt]);
}
static inline void perf_event_task_migrate(struct task_struct *task)
{
- if (perf_sw_migrate_enabled())
+ if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS))
task->sched_migrated = 1;
}
@@ -1207,11 +1201,9 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
if (static_branch_unlikely(&perf_sched_events))
__perf_event_task_sched_in(prev, task);
- if (perf_sw_migrate_enabled() && task->sched_migrated) {
- struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
-
- perf_fetch_caller_regs(regs);
- ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
+ if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) &&
+ task->sched_migrated) {
+ __perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
task->sched_migrated = 0;
}
}
@@ -1219,7 +1211,13 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
static inline void perf_event_task_sched_out(struct task_struct *prev,
struct task_struct *next)
{
- perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
+ if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
+ __perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
+
+ if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
+ (task_css_check(prev, perf_event_cgrp_id, 1)->cgroup !=
+ task_css_check(next, perf_event_cgrp_id, 1)->cgroup))
+ __perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
if (static_branch_unlikely(&perf_sched_events))
__perf_event_task_sched_out(prev, next);
@@ -1475,8 +1473,6 @@ static inline int perf_event_refresh(struct perf_event *event, int refresh)
static inline void
perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { }
static inline void
-perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { }
-static inline void
perf_bp_event(struct perf_event *event, void *data) { }
static inline int perf_register_guest_info_callbacks
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b15e3447cd9f..16b9538ad89b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -112,6 +112,7 @@ enum perf_sw_ids {
PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_DUMMY = 9,
PERF_COUNT_SW_BPF_OUTPUT = 10,
+ PERF_COUNT_SW_CGROUP_SWITCHES = 11,
PERF_COUNT_SW_MAX, /* non-ABI */
};
--
2.30.0.284.gd98b1dd5eaa7-goog
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] perf tools: Add 'cgroup-switches' software event
2021-01-14 8:50 [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event Namhyung Kim
@ 2021-01-14 8:50 ` Namhyung Kim
2021-01-21 8:00 ` [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event Namhyung Kim
1 sibling, 0 replies; 3+ messages in thread
From: Namhyung Kim @ 2021-01-14 8:50 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Jiri Olsa, Peter Zijlstra
Cc: Ingo Molnar, Mark Rutland, Alexander Shishkin, LKML,
Stephane Eranian, Andi Kleen, Ian Rogers
It counts how often cgroups are changed actually during the context
switches.
# perf stat -a -e context-switches,cgroup-switches -a sleep 1
Performance counter stats for 'system wide':
11,267 context-switches
10,950 cgroup-switches
1.015634369 seconds time elapsed
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/include/uapi/linux/perf_event.h | 1 +
tools/perf/util/parse-events.c | 4 ++++
tools/perf/util/parse-events.l | 1 +
3 files changed, 6 insertions(+)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index b95d3c485d27..16559703c49c 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -112,6 +112,7 @@ enum perf_sw_ids {
PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_DUMMY = 9,
PERF_COUNT_SW_BPF_OUTPUT = 10,
+ PERF_COUNT_SW_CGROUP_SWITCHES = 11,
PERF_COUNT_SW_MAX, /* non-ABI */
};
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 3b273580fb84..f6a5a099e143 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -145,6 +145,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
.symbol = "bpf-output",
.alias = "",
},
+ [PERF_COUNT_SW_CGROUP_SWITCHES] = {
+ .symbol = "cgroup-switches",
+ .alias = "",
+ },
};
#define __PERF_EVENT_FIELD(config, name) \
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 9db5097317f4..88f203bb6fab 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -347,6 +347,7 @@ emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EM
dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
+cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
/*
* We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
--
2.30.0.284.gd98b1dd5eaa7-goog
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event
2021-01-14 8:50 [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event Namhyung Kim
2021-01-14 8:50 ` [PATCH 2/2] perf tools: Add 'cgroup-switches' software event Namhyung Kim
@ 2021-01-21 8:00 ` Namhyung Kim
1 sibling, 0 replies; 3+ messages in thread
From: Namhyung Kim @ 2021-01-21 8:00 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Arnaldo Carvalho de Melo, Jiri Olsa, Ingo Molnar, Mark Rutland,
Alexander Shishkin, LKML, Stephane Eranian, Andi Kleen,
Ian Rogers
Hi Peter,
Can you please take a look at this again?
Thanks,
Namhyung
On Thu, Jan 14, 2021 at 5:50 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> This patch adds a new software event to count context switches
> involving cgroup switches. So it's counted only if cgroups of
> previous and next tasks are different. Note that it only checks the
> cgroups in the perf_event subsystem. For cgroup v2, it shouldn't
> matter anyway.
>
> One can argue that we can do this by using existing sched_switch event
> with eBPF. But some systems might not have eBPF for some reason so
> I'd like to add this as a simple way.
>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> include/linux/perf_event.h | 38 +++++++++++++++------------------
> include/uapi/linux/perf_event.h | 1 +
> 2 files changed, 18 insertions(+), 21 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 9a38f579bc76..304ef42d42d1 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1174,30 +1174,24 @@ DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
> * which is guaranteed by us not actually scheduling inside other swevents
> * because those disable preemption.
> */
> -static __always_inline void
> -perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
> +static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
> {
> - if (static_key_false(&perf_swevent_enabled[event_id])) {
> - struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
> + struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
>
> - perf_fetch_caller_regs(regs);
> - ___perf_sw_event(event_id, nr, regs, addr);
> - }
> + perf_fetch_caller_regs(regs);
> + ___perf_sw_event(event_id, nr, regs, addr);
> }
>
> extern struct static_key_false perf_sched_events;
>
> -static __always_inline bool
> -perf_sw_migrate_enabled(void)
> +static __always_inline bool __perf_sw_enabled(int swevt)
> {
> - if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
> - return true;
> - return false;
> + return static_key_false(&perf_swevent_enabled[swevt]);
> }
>
> static inline void perf_event_task_migrate(struct task_struct *task)
> {
> - if (perf_sw_migrate_enabled())
> + if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS))
> task->sched_migrated = 1;
> }
>
> @@ -1207,11 +1201,9 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
> if (static_branch_unlikely(&perf_sched_events))
> __perf_event_task_sched_in(prev, task);
>
> - if (perf_sw_migrate_enabled() && task->sched_migrated) {
> - struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
> -
> - perf_fetch_caller_regs(regs);
> - ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
> + if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) &&
> + task->sched_migrated) {
> + __perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
> task->sched_migrated = 0;
> }
> }
> @@ -1219,7 +1211,13 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
> static inline void perf_event_task_sched_out(struct task_struct *prev,
> struct task_struct *next)
> {
> - perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
> + if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
> + __perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
> +
> + if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
> + (task_css_check(prev, perf_event_cgrp_id, 1)->cgroup !=
> + task_css_check(next, perf_event_cgrp_id, 1)->cgroup))
> + __perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
>
> if (static_branch_unlikely(&perf_sched_events))
> __perf_event_task_sched_out(prev, next);
> @@ -1475,8 +1473,6 @@ static inline int perf_event_refresh(struct perf_event *event, int refresh)
> static inline void
> perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { }
> static inline void
> -perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { }
> -static inline void
> perf_bp_event(struct perf_event *event, void *data) { }
>
> static inline int perf_register_guest_info_callbacks
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index b15e3447cd9f..16b9538ad89b 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -112,6 +112,7 @@ enum perf_sw_ids {
> PERF_COUNT_SW_EMULATION_FAULTS = 8,
> PERF_COUNT_SW_DUMMY = 9,
> PERF_COUNT_SW_BPF_OUTPUT = 10,
> + PERF_COUNT_SW_CGROUP_SWITCHES = 11,
>
> PERF_COUNT_SW_MAX, /* non-ABI */
> };
> --
> 2.30.0.284.gd98b1dd5eaa7-goog
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-01-21 8:06 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-14 8:50 [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event Namhyung Kim
2021-01-14 8:50 ` [PATCH 2/2] perf tools: Add 'cgroup-switches' software event Namhyung Kim
2021-01-21 8:00 ` [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event Namhyung Kim
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.