linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>, Jiri Olsa <jolsa@redhat.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Stephane Eranian <eranian@google.com>, Tejun Heo <tj@kernel.org>,
	Li Zefan <lizefan@huawei.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Adrian Hunter <adrian.hunter@intel.com>
Subject: [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event
Date: Wed, 28 Aug 2019 16:31:22 +0900	[thread overview]
Message-ID: <20190828073130.83800-2-namhyung@kernel.org> (raw)
In-Reply-To: <20190828073130.83800-1-namhyung@kernel.org>

To support cgroup tracking, add CGROUP event to save a link between
cgroup path and inode number.  The attr.cgroup bit was also added to
enable cgroup tracking from userspace.

This event will be generated when a new cgroup becomes active.
Userspace might need to synthesize those events for existing cgroups.

As aux_output change is also going on, I just added the bit here as
well to remove possible conflicts later.

Cc: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 include/uapi/linux/perf_event.h |  15 ++++-
 kernel/events/core.c            | 112 ++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 7198ddd0c6b1..cb07c24b715f 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -374,7 +374,9 @@ struct perf_event_attr {
 				namespaces     :  1, /* include namespaces data */
 				ksymbol        :  1, /* include ksymbol events */
 				bpf_event      :  1, /* include bpf events */
-				__reserved_1   : 33;
+				aux_output     :  1, /* generate AUX records instead of events */
+				cgroup         :  1, /* include cgroup events */
+				__reserved_1   : 31;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -999,6 +1001,17 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_BPF_EVENT			= 18,
 
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				ino;
+	 *	u64				path_len;
+	 *	char				path[];
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_CGROUP			= 19,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0463c1151bae..d898263db4fc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -386,6 +386,7 @@ static atomic_t nr_freq_events __read_mostly;
 static atomic_t nr_switch_events __read_mostly;
 static atomic_t nr_ksymbol_events __read_mostly;
 static atomic_t nr_bpf_events __read_mostly;
+static atomic_t nr_cgroup_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -4314,6 +4315,8 @@ static void unaccount_event(struct perf_event *event)
 		atomic_dec(&nr_comm_events);
 	if (event->attr.namespaces)
 		atomic_dec(&nr_namespaces_events);
+	if (event->attr.cgroup)
+		atomic_dec(&nr_cgroup_events);
 	if (event->attr.task)
 		atomic_dec(&nr_task_events);
 	if (event->attr.freq)
@@ -7217,6 +7220,106 @@ void perf_event_namespaces(struct task_struct *task)
 			NULL);
 }
 
+/*
+ * cgroup tracking
+ */
+#ifdef CONFIG_CGROUPS
+
+struct perf_cgroup_event {
+	char				*path;
+	struct {
+		struct perf_event_header	header;
+		u64				ino;
+		u64				path_len;
+		char				path[];
+	} event_id;
+};
+
+static int perf_event_cgroup_match(struct perf_event *event)
+{
+	return event->attr.cgroup;
+}
+
+static void perf_event_cgroup_output(struct perf_event *event, void *data)
+{
+	struct perf_cgroup_event *cgroup_event = data;
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	u16 header_size = cgroup_event->event_id.header.size;
+	int ret;
+
+	if (!perf_event_cgroup_match(event))
+		return;
+
+	perf_event_header__init_id(&cgroup_event->event_id.header,
+				   &sample, event);
+	ret = perf_output_begin(&handle, event,
+				cgroup_event->event_id.header.size);
+	if (ret)
+		goto out;
+
+	perf_output_put(&handle, cgroup_event->event_id);
+	__output_copy(&handle, cgroup_event->path,
+		      cgroup_event->event_id.path_len);
+
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+out:
+	cgroup_event->event_id.header.size = header_size;
+}
+
+void perf_event_cgroup(struct cgroup *cgrp)
+{
+	struct perf_cgroup_event cgroup_event;
+	char path_enomem[16] = "//enomem";
+	char *pathname;
+	size_t size;
+
+	if (!atomic_read(&nr_cgroup_events))
+		return;
+
+	cgroup_event = (struct perf_cgroup_event){
+		.event_id  = {
+			.header = {
+				.type = PERF_RECORD_CGROUP,
+				.misc = 0,
+				.size = sizeof(cgroup_event.event_id),
+			},
+			.ino = cgrp->kn->id.ino,
+		},
+	};
+
+	pathname = kmalloc(PATH_MAX, GFP_KERNEL);
+	if (pathname == NULL) {
+		cgroup_event.path = path_enomem;
+	} else {
+		/* just to be sure to have enough space for alignment */
+		cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64));
+		cgroup_event.path = pathname;
+	}
+
+	/*
+	 * Since our buffer works in 8 byte units we need to align our string
+	 * size to a multiple of 8. However, we must guarantee the tail end is
+	 * zero'd out to avoid leaking random bits to userspace.
+	 */
+	size = strlen(cgroup_event.path) + 1;
+	while (!IS_ALIGNED(size, sizeof(u64)))
+		cgroup_event.path[size++] = '\0';
+
+	cgroup_event.event_id.header.size += size;
+	cgroup_event.event_id.path_len = size;
+
+	perf_iterate_sb(perf_event_cgroup_output,
+			&cgroup_event,
+			NULL);
+
+	kfree(pathname);
+}
+
+#endif
+
 /*
  * mmap tracking
  */
@@ -10232,6 +10335,8 @@ static void account_event(struct perf_event *event)
 		atomic_inc(&nr_comm_events);
 	if (event->attr.namespaces)
 		atomic_inc(&nr_namespaces_events);
+	if (event->attr.cgroup)
+		atomic_inc(&nr_cgroup_events);
 	if (event->attr.task)
 		atomic_inc(&nr_task_events);
 	if (event->attr.freq)
@@ -12186,6 +12291,12 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
 	kfree(jc);
 }
 
+static int perf_cgroup_css_online(struct cgroup_subsys_state *css)
+{
+	perf_event_cgroup(css->cgroup);
+	return 0;
+}
+
 static int __perf_cgroup_move(void *info)
 {
 	struct task_struct *task = info;
@@ -12207,6 +12318,7 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset)
 struct cgroup_subsys perf_event_cgrp_subsys = {
 	.css_alloc	= perf_cgroup_css_alloc,
 	.css_free	= perf_cgroup_css_free,
+	.css_online	= perf_cgroup_css_online,
 	.attach		= perf_cgroup_attach,
 	/*
 	 * Implicitly enable on dfl hierarchy so that perf events can
-- 
2.23.0.187.g17f5b7556c-goog


  reply	other threads:[~2019-08-28  7:31 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-28  7:31 [PATCHSET 0/9] perf: Improve cgroup profiling (v1) Namhyung Kim
2019-08-28  7:31 ` Namhyung Kim [this message]
2019-08-28  9:44   ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Peter Zijlstra
2019-08-28  9:44   ` Peter Zijlstra
2019-08-28 13:13     ` Arnaldo Carvalho de Melo
2019-08-30  3:46     ` Namhyung Kim
2019-08-30  7:34       ` Peter Zijlstra
2019-08-30 22:49         ` Namhyung Kim
2019-08-30 23:52           ` Stephane Eranian
2019-08-28 14:48   ` Tejun Heo
2019-08-30  3:56     ` Namhyung Kim
2019-08-28  7:31 ` [PATCH 2/9] perf/core: Add PERF_SAMPLE_CGROUP feature Namhyung Kim
2019-08-28 14:49   ` Tejun Heo
2019-08-31  3:03     ` Namhyung Kim
2019-08-31  4:58       ` Tejun Heo
2019-09-03  2:13         ` Namhyung Kim
2019-09-05 16:56           ` Tejun Heo
2019-09-08 13:28             ` Namhyung Kim
2019-09-14 14:02         ` Song Liu
2019-09-16 15:23           ` Tejun Heo
2019-09-19  6:42             ` Song Liu
2019-09-20  8:47               ` Namhyung Kim
2019-09-20 16:13                 ` Song Liu
2019-09-20 21:04                 ` Tejun Heo
2019-10-02  6:28                   ` Namhyung Kim
2019-10-07 14:16                     ` Tejun Heo
2019-08-28  7:31 ` [PATCH 3/9] perf tools: Basic support for CGROUP event Namhyung Kim
2019-08-30 12:55   ` Arnaldo Carvalho de Melo
2019-08-30 22:51     ` Namhyung Kim
2019-08-28  7:31 ` [PATCH 4/9] perf tools: Maintain cgroup hierarchy Namhyung Kim
2019-08-28  7:31 ` [PATCH 5/9] perf report: Add 'cgroup' sort key Namhyung Kim
2019-08-28  7:31 ` [PATCH 6/9] perf record: Support synthesizing cgroup events Namhyung Kim
2019-08-28  7:31 ` [PATCH 7/9] perf record: Add --all-cgroups option Namhyung Kim
2019-08-28  7:31 ` [PATCH 8/9] perf top: " Namhyung Kim
2019-08-28  7:31 ` [PATCH 9/9] perf script: Add --show-cgroup-events option Namhyung Kim
2019-12-20  4:32 [PATCHSET 0/9] perf: Improve cgroup profiling (v2) Namhyung Kim
2019-12-20  4:32 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Namhyung Kim
2019-12-20  9:33   ` Peter Zijlstra
2019-12-20 11:28     ` Namhyung Kim
2019-12-20 11:50     ` Namhyung Kim
2019-12-20 15:16     ` Tejun Heo
2019-12-21  8:49       ` Namhyung Kim
2019-12-23  6:07 [PATCHSET 0/9] perf: Improve cgroup profiling (v3) Namhyung Kim
2019-12-23  6:07 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Namhyung Kim
2020-01-07 12:51   ` Peter Zijlstra
2020-01-07 13:46     ` Namhyung Kim
2020-01-07 13:34 [PATCHSET 0/9] perf: Improve cgroup profiling (v4) Namhyung Kim
2020-01-07 13:34 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Namhyung Kim
2020-03-25 12:45 [PATCHSET 0/9] perf: Improve cgroup profiling (v6) Namhyung Kim
2020-03-25 12:45 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Namhyung Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190828073130.83800-2-namhyung@kernel.org \
    --to=namhyung@kernel.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=eranian@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizefan@huawei.com \
    --cc=mingo@kernel.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).