All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event
@ 2021-01-14  8:50 Namhyung Kim
  2021-01-14  8:50 ` [PATCH 2/2] perf tools: Add 'cgroup-switches' software event Namhyung Kim
  2021-01-21  8:00 ` [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event Namhyung Kim
  0 siblings, 2 replies; 3+ messages in thread
From: Namhyung Kim @ 2021-01-14  8:50 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Jiri Olsa, Peter Zijlstra
  Cc: Ingo Molnar, Mark Rutland, Alexander Shishkin, LKML,
	Stephane Eranian, Andi Kleen, Ian Rogers

This patch adds a new software event to count context switches
involving cgroup switches.  So it's counted only if cgroups of
previous and next tasks are different.  Note that it only checks the
cgroups in the perf_event subsystem.  For cgroup v2, it shouldn't
matter anyway.

One can argue that we can do this by using existing sched_switch event
with eBPF.  But some systems might not have eBPF for some reason so
I'd like to add this as a simple way.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 include/linux/perf_event.h      | 38 +++++++++++++++------------------
 include/uapi/linux/perf_event.h |  1 +
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9a38f579bc76..304ef42d42d1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1174,30 +1174,24 @@ DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
  * which is guaranteed by us not actually scheduling inside other swevents
  * because those disable preemption.
  */
-static __always_inline void
-perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
+static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
 {
-	if (static_key_false(&perf_swevent_enabled[event_id])) {
-		struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
+	struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
 
-		perf_fetch_caller_regs(regs);
-		___perf_sw_event(event_id, nr, regs, addr);
-	}
+	perf_fetch_caller_regs(regs);
+	___perf_sw_event(event_id, nr, regs, addr);
 }
 
 extern struct static_key_false perf_sched_events;
 
-static __always_inline bool
-perf_sw_migrate_enabled(void)
+static __always_inline bool __perf_sw_enabled(int swevt)
 {
-	if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
-		return true;
-	return false;
+	return static_key_false(&perf_swevent_enabled[swevt]);
 }
 
 static inline void perf_event_task_migrate(struct task_struct *task)
 {
-	if (perf_sw_migrate_enabled())
+	if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS))
 		task->sched_migrated = 1;
 }
 
@@ -1207,11 +1201,9 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
 	if (static_branch_unlikely(&perf_sched_events))
 		__perf_event_task_sched_in(prev, task);
 
-	if (perf_sw_migrate_enabled() && task->sched_migrated) {
-		struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
-
-		perf_fetch_caller_regs(regs);
-		___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
+	if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) &&
+	    task->sched_migrated) {
+		__perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
 		task->sched_migrated = 0;
 	}
 }
@@ -1219,7 +1211,13 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
 static inline void perf_event_task_sched_out(struct task_struct *prev,
 					     struct task_struct *next)
 {
-	perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
+	if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
+		__perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
+
+	if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
+	    (task_css_check(prev, perf_event_cgrp_id, 1)->cgroup !=
+	     task_css_check(next, perf_event_cgrp_id, 1)->cgroup))
+		__perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
 
 	if (static_branch_unlikely(&perf_sched_events))
 		__perf_event_task_sched_out(prev, next);
@@ -1475,8 +1473,6 @@ static inline int perf_event_refresh(struct perf_event *event, int refresh)
 static inline void
 perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }
 static inline void
-perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)			{ }
-static inline void
 perf_bp_event(struct perf_event *event, void *data)			{ }
 
 static inline int perf_register_guest_info_callbacks
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b15e3447cd9f..16b9538ad89b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -112,6 +112,7 @@ enum perf_sw_ids {
 	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 	PERF_COUNT_SW_DUMMY			= 9,
 	PERF_COUNT_SW_BPF_OUTPUT		= 10,
+	PERF_COUNT_SW_CGROUP_SWITCHES		= 11,
 
 	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
-- 
2.30.0.284.gd98b1dd5eaa7-goog


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] perf tools: Add 'cgroup-switches' software event
  2021-01-14  8:50 [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event Namhyung Kim
@ 2021-01-14  8:50 ` Namhyung Kim
  2021-01-21  8:00 ` [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event Namhyung Kim
  1 sibling, 0 replies; 3+ messages in thread
From: Namhyung Kim @ 2021-01-14  8:50 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Jiri Olsa, Peter Zijlstra
  Cc: Ingo Molnar, Mark Rutland, Alexander Shishkin, LKML,
	Stephane Eranian, Andi Kleen, Ian Rogers

It counts how often cgroups are changed actually during the context
switches.

  # perf stat -a -e context-switches,cgroup-switches -a sleep 1

   Performance counter stats for 'system wide':

              11,267      context-switches
              10,950      cgroup-switches

         1.015634369 seconds time elapsed

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/include/uapi/linux/perf_event.h | 1 +
 tools/perf/util/parse-events.c        | 4 ++++
 tools/perf/util/parse-events.l        | 1 +
 3 files changed, 6 insertions(+)

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index b95d3c485d27..16559703c49c 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -112,6 +112,7 @@ enum perf_sw_ids {
 	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 	PERF_COUNT_SW_DUMMY			= 9,
 	PERF_COUNT_SW_BPF_OUTPUT		= 10,
+	PERF_COUNT_SW_CGROUP_SWITCHES		= 11,
 
 	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 3b273580fb84..f6a5a099e143 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -145,6 +145,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
 		.symbol = "bpf-output",
 		.alias  = "",
 	},
+	[PERF_COUNT_SW_CGROUP_SWITCHES] = {
+		.symbol = "cgroup-switches",
+		.alias  = "",
+	},
 };
 
 #define __PERF_EVENT_FIELD(config, name) \
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 9db5097317f4..88f203bb6fab 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -347,6 +347,7 @@ emulation-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EM
 dummy						{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
 duration_time					{ return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
 bpf-output					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
+cgroup-switches					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
 
 	/*
 	 * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
-- 
2.30.0.284.gd98b1dd5eaa7-goog


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event
  2021-01-14  8:50 [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event Namhyung Kim
  2021-01-14  8:50 ` [PATCH 2/2] perf tools: Add 'cgroup-switches' software event Namhyung Kim
@ 2021-01-21  8:00 ` Namhyung Kim
  1 sibling, 0 replies; 3+ messages in thread
From: Namhyung Kim @ 2021-01-21  8:00 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Arnaldo Carvalho de Melo, Jiri Olsa, Ingo Molnar, Mark Rutland,
	Alexander Shishkin, LKML, Stephane Eranian, Andi Kleen,
	Ian Rogers

Hi Peter,

Can you please take a look at this again?

Thanks,
Namhyung

On Thu, Jan 14, 2021 at 5:50 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> This patch adds a new software event to count context switches
> involving cgroup switches.  So it's counted only if cgroups of
> previous and next tasks are different.  Note that it only checks the
> cgroups in the perf_event subsystem.  For cgroup v2, it shouldn't
> matter anyway.
>
> One can argue that we can do this by using existing sched_switch event
> with eBPF.  But some systems might not have eBPF for some reason so
> I'd like to add this as a simple way.
>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  include/linux/perf_event.h      | 38 +++++++++++++++------------------
>  include/uapi/linux/perf_event.h |  1 +
>  2 files changed, 18 insertions(+), 21 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 9a38f579bc76..304ef42d42d1 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1174,30 +1174,24 @@ DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
>   * which is guaranteed by us not actually scheduling inside other swevents
>   * because those disable preemption.
>   */
> -static __always_inline void
> -perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
> +static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
>  {
> -       if (static_key_false(&perf_swevent_enabled[event_id])) {
> -               struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
> +       struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
>
> -               perf_fetch_caller_regs(regs);
> -               ___perf_sw_event(event_id, nr, regs, addr);
> -       }
> +       perf_fetch_caller_regs(regs);
> +       ___perf_sw_event(event_id, nr, regs, addr);
>  }
>
>  extern struct static_key_false perf_sched_events;
>
> -static __always_inline bool
> -perf_sw_migrate_enabled(void)
> +static __always_inline bool __perf_sw_enabled(int swevt)
>  {
> -       if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
> -               return true;
> -       return false;
> +       return static_key_false(&perf_swevent_enabled[swevt]);
>  }
>
>  static inline void perf_event_task_migrate(struct task_struct *task)
>  {
> -       if (perf_sw_migrate_enabled())
> +       if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS))
>                 task->sched_migrated = 1;
>  }
>
> @@ -1207,11 +1201,9 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
>         if (static_branch_unlikely(&perf_sched_events))
>                 __perf_event_task_sched_in(prev, task);
>
> -       if (perf_sw_migrate_enabled() && task->sched_migrated) {
> -               struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
> -
> -               perf_fetch_caller_regs(regs);
> -               ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
> +       if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) &&
> +           task->sched_migrated) {
> +               __perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
>                 task->sched_migrated = 0;
>         }
>  }
> @@ -1219,7 +1211,13 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
>  static inline void perf_event_task_sched_out(struct task_struct *prev,
>                                              struct task_struct *next)
>  {
> -       perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
> +       if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
> +               __perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
> +
> +       if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
> +           (task_css_check(prev, perf_event_cgrp_id, 1)->cgroup !=
> +            task_css_check(next, perf_event_cgrp_id, 1)->cgroup))
> +               __perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
>
>         if (static_branch_unlikely(&perf_sched_events))
>                 __perf_event_task_sched_out(prev, next);
> @@ -1475,8 +1473,6 @@ static inline int perf_event_refresh(struct perf_event *event, int refresh)
>  static inline void
>  perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)    { }
>  static inline void
> -perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)                    { }
> -static inline void
>  perf_bp_event(struct perf_event *event, void *data)                    { }
>
>  static inline int perf_register_guest_info_callbacks
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index b15e3447cd9f..16b9538ad89b 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -112,6 +112,7 @@ enum perf_sw_ids {
>         PERF_COUNT_SW_EMULATION_FAULTS          = 8,
>         PERF_COUNT_SW_DUMMY                     = 9,
>         PERF_COUNT_SW_BPF_OUTPUT                = 10,
> +       PERF_COUNT_SW_CGROUP_SWITCHES           = 11,
>
>         PERF_COUNT_SW_MAX,                      /* non-ABI */
>  };
> --
> 2.30.0.284.gd98b1dd5eaa7-goog
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-01-21  8:06 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-14  8:50 [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event Namhyung Kim
2021-01-14  8:50 ` [PATCH 2/2] perf tools: Add 'cgroup-switches' software event Namhyung Kim
2021-01-21  8:00 ` [PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event Namhyung Kim

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.