All of lore.kernel.org
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Stephane Eranian <eranian@google.com>,
	LKML <linux-kernel@vger.kernel.org>,
	linux-perf-users@vger.kernel.org, Tejun Heo <tj@kernel.org>,
	Li Zefan <lizefan@huawei.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Adrian Hunter <adrian.hunter@intel.com>
Subject: [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event
Date: Mon, 23 Dec 2019 15:07:51 +0900	[thread overview]
Message-ID: <20191223060759.841176-2-namhyung@kernel.org> (raw)
In-Reply-To: <20191223060759.841176-1-namhyung@kernel.org>

To support cgroup tracking, add CGROUP event to save a link between
cgroup path and id number.  This is needed since cgroups can go away
when userspace tries to read the cgroup info (from the id) later.
The attr.cgroup bit was also added to enable cgroup tracking from
userspace.

This event will be generated when a new cgroup becomes active.
Userspace might need to synthesize those events for existing cgroups.

Cc: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 include/uapi/linux/perf_event.h |  13 +++-
 kernel/events/core.c            | 111 ++++++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 377d794d3105..de2ab87ca92c 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -377,7 +377,8 @@ struct perf_event_attr {
 				ksymbol        :  1, /* include ksymbol events */
 				bpf_event      :  1, /* include bpf events */
 				aux_output     :  1, /* generate AUX records instead of events */
-				__reserved_1   : 32;
+				cgroup         :  1, /* include cgroup events */
+				__reserved_1   : 31;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -1006,6 +1007,16 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_BPF_EVENT			= 18,
 
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				id;
+	 *	char				path[];
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_CGROUP			= 19,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4ff86d57f9e5..b0aa1b921769 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -386,6 +386,7 @@ static atomic_t nr_freq_events __read_mostly;
 static atomic_t nr_switch_events __read_mostly;
 static atomic_t nr_ksymbol_events __read_mostly;
 static atomic_t nr_bpf_events __read_mostly;
+static atomic_t nr_cgroup_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -4455,6 +4456,8 @@ static void unaccount_event(struct perf_event *event)
 		atomic_dec(&nr_comm_events);
 	if (event->attr.namespaces)
 		atomic_dec(&nr_namespaces_events);
+	if (event->attr.cgroup)
+		atomic_dec(&nr_cgroup_events);
 	if (event->attr.task)
 		atomic_dec(&nr_task_events);
 	if (event->attr.freq)
@@ -7564,6 +7567,105 @@ void perf_event_namespaces(struct task_struct *task)
 			NULL);
 }
 
+/*
+ * cgroup tracking
+ */
+#ifdef CONFIG_CGROUPS
+
+struct perf_cgroup_event {
+	char				*path;
+	int				path_size;
+	struct {
+		struct perf_event_header	header;
+		u64				id;
+		char				path[];
+	} event_id;
+};
+
+static int perf_event_cgroup_match(struct perf_event *event)
+{
+	return event->attr.cgroup;
+}
+
+static void perf_event_cgroup_output(struct perf_event *event, void *data)
+{
+	struct perf_cgroup_event *cgroup_event = data;
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	u16 header_size = cgroup_event->event_id.header.size;
+	int ret;
+
+	if (!perf_event_cgroup_match(event))
+		return;
+
+	perf_event_header__init_id(&cgroup_event->event_id.header,
+				   &sample, event);
+	ret = perf_output_begin(&handle, event,
+				cgroup_event->event_id.header.size);
+	if (ret)
+		goto out;
+
+	perf_output_put(&handle, cgroup_event->event_id);
+	__output_copy(&handle, cgroup_event->path, cgroup_event->path_size);
+
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+out:
+	cgroup_event->event_id.header.size = header_size;
+}
+
+void perf_event_cgroup(struct cgroup *cgrp)
+{
+	struct perf_cgroup_event cgroup_event;
+	char path_enomem[16] = "//enomem";
+	char *pathname;
+	size_t size;
+
+	if (!atomic_read(&nr_cgroup_events))
+		return;
+
+	cgroup_event = (struct perf_cgroup_event){
+		.event_id  = {
+			.header = {
+				.type = PERF_RECORD_CGROUP,
+				.misc = 0,
+				.size = sizeof(cgroup_event.event_id),
+			},
+			.id = cgroup_id(cgrp),
+		},
+	};
+
+	pathname = kmalloc(PATH_MAX, GFP_KERNEL);
+	if (pathname == NULL) {
+		cgroup_event.path = path_enomem;
+	} else {
+		/* just to be sure to have enough space for alignment */
+		cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64));
+		cgroup_event.path = pathname;
+	}
+
+	/*
+	 * Since our buffer works in 8 byte units we need to align our string
+	 * size to a multiple of 8. However, we must guarantee the tail end is
+	 * zero'd out to avoid leaking random bits to userspace.
+	 */
+	size = strlen(cgroup_event.path) + 1;
+	while (!IS_ALIGNED(size, sizeof(u64)))
+		cgroup_event.path[size++] = '\0';
+
+	cgroup_event.event_id.header.size += size;
+	cgroup_event.path_size = size;
+
+	perf_iterate_sb(perf_event_cgroup_output,
+			&cgroup_event,
+			NULL);
+
+	kfree(pathname);
+}
+
+#endif
+
 /*
  * mmap tracking
  */
@@ -10607,6 +10709,8 @@ static void account_event(struct perf_event *event)
 		atomic_inc(&nr_comm_events);
 	if (event->attr.namespaces)
 		atomic_inc(&nr_namespaces_events);
+	if (event->attr.cgroup)
+		atomic_inc(&nr_cgroup_events);
 	if (event->attr.task)
 		atomic_inc(&nr_task_events);
 	if (event->attr.freq)
@@ -12581,6 +12685,12 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
 	kfree(jc);
 }
 
+static int perf_cgroup_css_online(struct cgroup_subsys_state *css)
+{
+	perf_event_cgroup(css->cgroup);
+	return 0;
+}
+
 static int __perf_cgroup_move(void *info)
 {
 	struct task_struct *task = info;
@@ -12602,6 +12712,7 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset)
 struct cgroup_subsys perf_event_cgrp_subsys = {
 	.css_alloc	= perf_cgroup_css_alloc,
 	.css_free	= perf_cgroup_css_free,
+	.css_online	= perf_cgroup_css_online,
 	.attach		= perf_cgroup_attach,
 	/*
 	 * Implicitly enable on dfl hierarchy so that perf events can
-- 
2.24.1.735.g03f4e72817-goog


  reply	other threads:[~2019-12-23  6:08 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-23  6:07 [PATCHSET 0/9] perf: Improve cgroup profiling (v3) Namhyung Kim
2019-12-23  6:07 ` Namhyung Kim [this message]
2019-12-27  4:07   ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event kbuild test robot
2019-12-27  4:07   ` [RFC PATCH] perf/core: perf_event_cgroup() can be static kbuild test robot
2020-01-07 12:51   ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Peter Zijlstra
2020-01-07 13:46     ` Namhyung Kim
2019-12-23  6:07 ` [PATCH 2/9] perf/core: Add PERF_SAMPLE_CGROUP feature Namhyung Kim
2019-12-23  6:07 ` [PATCH 3/9] perf tools: Basic support for CGROUP event Namhyung Kim
2019-12-23  6:07 ` [PATCH 4/9] perf tools: Maintain cgroup hierarchy Namhyung Kim
2019-12-23  6:07 ` [PATCH 5/9] perf report: Add 'cgroup' sort key Namhyung Kim
2019-12-23  6:07 ` [PATCH 6/9] perf record: Support synthesizing cgroup events Namhyung Kim
2019-12-23  6:07 ` [PATCH 7/9] perf record: Add --all-cgroups option Namhyung Kim
2019-12-23  6:07 ` [PATCH 8/9] perf top: " Namhyung Kim
2019-12-23  6:07 ` [PATCH 9/9] perf script: Add --show-cgroup-events option Namhyung Kim
2019-12-23 17:35 ` [PATCHSET 0/9] perf: Improve cgroup profiling (v3) Vince Weaver
2019-12-24  0:40   ` Namhyung Kim
2019-12-26 12:46     ` Arnaldo Carvalho de Melo
2019-12-27 18:31       ` Vince Weaver
  -- strict thread matches above, loose matches on Subject: below --
2020-03-25 12:45 [PATCHSET 0/9] perf: Improve cgroup profiling (v6) Namhyung Kim
2020-03-25 12:45 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Namhyung Kim
2020-01-07 13:34 [PATCHSET 0/9] perf: Improve cgroup profiling (v4) Namhyung Kim
2020-01-07 13:34 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Namhyung Kim
2019-12-20  4:32 [PATCHSET 0/9] perf: Improve cgroup profiling (v2) Namhyung Kim
2019-12-20  4:32 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Namhyung Kim
2019-12-20  9:33   ` Peter Zijlstra
2019-12-20 11:28     ` Namhyung Kim
2019-12-20 11:50     ` Namhyung Kim
2019-12-20 15:16     ` Tejun Heo
2019-12-21  8:49       ` Namhyung Kim
2019-08-28  7:31 [PATCHSET 0/9] perf: Improve cgroup profiling (v1) Namhyung Kim
2019-08-28  7:31 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Namhyung Kim
2019-08-28  9:44   ` Peter Zijlstra
2019-08-28  9:44   ` Peter Zijlstra
2019-08-28 13:13     ` Arnaldo Carvalho de Melo
2019-08-30  3:46     ` Namhyung Kim
2019-08-30  7:34       ` Peter Zijlstra
2019-08-30 22:49         ` Namhyung Kim
2019-08-30 23:52           ` Stephane Eranian
2019-08-28 14:48   ` Tejun Heo
2019-08-30  3:56     ` Namhyung Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191223060759.841176-2-namhyung@kernel.org \
    --to=namhyung@kernel.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=eranian@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=lizefan@huawei.com \
    --cc=mark.rutland@arm.com \
    --cc=mingo@kernel.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.