From: Namhyung Kim <namhyung@kernel.org>
To: Ingo Molnar <mingo@kernel.org>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>, Jiri Olsa <jolsa@redhat.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Stephane Eranian <eranian@google.com>, Tejun Heo <tj@kernel.org>,
Li Zefan <lizefan@huawei.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Adrian Hunter <adrian.hunter@intel.com>
Subject: [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event
Date: Wed, 28 Aug 2019 16:31:22 +0900 [thread overview]
Message-ID: <20190828073130.83800-2-namhyung@kernel.org> (raw)
In-Reply-To: <20190828073130.83800-1-namhyung@kernel.org>
To support cgroup tracking, add CGROUP event to save a link between
cgroup path and inode number. The attr.cgroup bit was also added to
enable cgroup tracking from userspace.
This event will be generated when a new cgroup becomes active.
Userspace might need to synthesize those events for existing cgroups.
As aux_output change is also going on, I just added the bit here as
well to remove possible conflicts later.
Cc: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
include/uapi/linux/perf_event.h | 15 ++++-
kernel/events/core.c | 112 ++++++++++++++++++++++++++++++++
2 files changed, 126 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 7198ddd0c6b1..cb07c24b715f 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -374,7 +374,9 @@ struct perf_event_attr {
namespaces : 1, /* include namespaces data */
ksymbol : 1, /* include ksymbol events */
bpf_event : 1, /* include bpf events */
- __reserved_1 : 33;
+ aux_output : 1, /* generate AUX records instead of events */
+ cgroup : 1, /* include cgroup events */
+ __reserved_1 : 31;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -999,6 +1001,17 @@ enum perf_event_type {
*/
PERF_RECORD_BPF_EVENT = 18,
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u64 ino;
+ * u64 path_len;
+ * char path[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_CGROUP = 19,
+
PERF_RECORD_MAX, /* non-ABI */
};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0463c1151bae..d898263db4fc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -386,6 +386,7 @@ static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
static atomic_t nr_ksymbol_events __read_mostly;
static atomic_t nr_bpf_events __read_mostly;
+static atomic_t nr_cgroup_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
@@ -4314,6 +4315,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_comm_events);
if (event->attr.namespaces)
atomic_dec(&nr_namespaces_events);
+ if (event->attr.cgroup)
+ atomic_dec(&nr_cgroup_events);
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
@@ -7217,6 +7220,106 @@ void perf_event_namespaces(struct task_struct *task)
NULL);
}
+/*
+ * cgroup tracking
+ */
+#ifdef CONFIG_CGROUPS
+
+struct perf_cgroup_event {
+ char *path;
+ struct {
+ struct perf_event_header header;
+ u64 ino;
+ u64 path_len;
+ char path[];
+ } event_id;
+};
+
+static int perf_event_cgroup_match(struct perf_event *event)
+{
+ return event->attr.cgroup;
+}
+
+static void perf_event_cgroup_output(struct perf_event *event, void *data)
+{
+ struct perf_cgroup_event *cgroup_event = data;
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ u16 header_size = cgroup_event->event_id.header.size;
+ int ret;
+
+ if (!perf_event_cgroup_match(event))
+ return;
+
+ perf_event_header__init_id(&cgroup_event->event_id.header,
+ &sample, event);
+ ret = perf_output_begin(&handle, event,
+ cgroup_event->event_id.header.size);
+ if (ret)
+ goto out;
+
+ perf_output_put(&handle, cgroup_event->event_id);
+ __output_copy(&handle, cgroup_event->path,
+ cgroup_event->event_id.path_len);
+
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+out:
+ cgroup_event->event_id.header.size = header_size;
+}
+
+void perf_event_cgroup(struct cgroup *cgrp)
+{
+ struct perf_cgroup_event cgroup_event;
+ char path_enomem[16] = "//enomem";
+ char *pathname;
+ size_t size;
+
+ if (!atomic_read(&nr_cgroup_events))
+ return;
+
+ cgroup_event = (struct perf_cgroup_event){
+ .event_id = {
+ .header = {
+ .type = PERF_RECORD_CGROUP,
+ .misc = 0,
+ .size = sizeof(cgroup_event.event_id),
+ },
+ .ino = cgrp->kn->id.ino,
+ },
+ };
+
+ pathname = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (pathname == NULL) {
+ cgroup_event.path = path_enomem;
+ } else {
+ /* just to be sure to have enough space for alignment */
+ cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64));
+ cgroup_event.path = pathname;
+ }
+
+ /*
+ * Since our buffer works in 8 byte units we need to align our string
+ * size to a multiple of 8. However, we must guarantee the tail end is
+ * zero'd out to avoid leaking random bits to userspace.
+ */
+ size = strlen(cgroup_event.path) + 1;
+ while (!IS_ALIGNED(size, sizeof(u64)))
+ cgroup_event.path[size++] = '\0';
+
+ cgroup_event.event_id.header.size += size;
+ cgroup_event.event_id.path_len = size;
+
+ perf_iterate_sb(perf_event_cgroup_output,
+ &cgroup_event,
+ NULL);
+
+ kfree(pathname);
+}
+
+#endif
+
/*
* mmap tracking
*/
@@ -10232,6 +10335,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_comm_events);
if (event->attr.namespaces)
atomic_inc(&nr_namespaces_events);
+ if (event->attr.cgroup)
+ atomic_inc(&nr_cgroup_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq)
@@ -12186,6 +12291,12 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
kfree(jc);
}
+static int perf_cgroup_css_online(struct cgroup_subsys_state *css)
+{
+ perf_event_cgroup(css->cgroup);
+ return 0;
+}
+
static int __perf_cgroup_move(void *info)
{
struct task_struct *task = info;
@@ -12207,6 +12318,7 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset)
struct cgroup_subsys perf_event_cgrp_subsys = {
.css_alloc = perf_cgroup_css_alloc,
.css_free = perf_cgroup_css_free,
+ .css_online = perf_cgroup_css_online,
.attach = perf_cgroup_attach,
/*
* Implicitly enable on dfl hierarchy so that perf events can
--
2.23.0.187.g17f5b7556c-goog
next prev parent reply other threads:[~2019-08-28 7:31 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-08-28 7:31 [PATCHSET 0/9] perf: Improve cgroup profiling (v1) Namhyung Kim
2019-08-28 7:31 ` Namhyung Kim [this message]
2019-08-28 9:44 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Peter Zijlstra
2019-08-28 9:44 ` Peter Zijlstra
2019-08-28 13:13 ` Arnaldo Carvalho de Melo
2019-08-30 3:46 ` Namhyung Kim
2019-08-30 7:34 ` Peter Zijlstra
2019-08-30 22:49 ` Namhyung Kim
2019-08-30 23:52 ` Stephane Eranian
2019-08-28 14:48 ` Tejun Heo
2019-08-30 3:56 ` Namhyung Kim
2019-08-28 7:31 ` [PATCH 2/9] perf/core: Add PERF_SAMPLE_CGROUP feature Namhyung Kim
2019-08-28 14:49 ` Tejun Heo
2019-08-31 3:03 ` Namhyung Kim
2019-08-31 4:58 ` Tejun Heo
2019-09-03 2:13 ` Namhyung Kim
2019-09-05 16:56 ` Tejun Heo
2019-09-08 13:28 ` Namhyung Kim
2019-09-14 14:02 ` Song Liu
2019-09-16 15:23 ` Tejun Heo
2019-09-19 6:42 ` Song Liu
2019-09-20 8:47 ` Namhyung Kim
2019-09-20 16:13 ` Song Liu
2019-09-20 21:04 ` Tejun Heo
2019-10-02 6:28 ` Namhyung Kim
2019-10-07 14:16 ` Tejun Heo
2019-08-28 7:31 ` [PATCH 3/9] perf tools: Basic support for CGROUP event Namhyung Kim
2019-08-30 12:55 ` Arnaldo Carvalho de Melo
2019-08-30 22:51 ` Namhyung Kim
2019-08-28 7:31 ` [PATCH 4/9] perf tools: Maintain cgroup hierarchy Namhyung Kim
2019-08-28 7:31 ` [PATCH 5/9] perf report: Add 'cgroup' sort key Namhyung Kim
2019-08-28 7:31 ` [PATCH 6/9] perf record: Support synthesizing cgroup events Namhyung Kim
2019-08-28 7:31 ` [PATCH 7/9] perf record: Add --all-cgroups option Namhyung Kim
2019-08-28 7:31 ` [PATCH 8/9] perf top: " Namhyung Kim
2019-08-28 7:31 ` [PATCH 9/9] perf script: Add --show-cgroup-events option Namhyung Kim
2019-12-20 4:32 [PATCHSET 0/9] perf: Improve cgroup profiling (v2) Namhyung Kim
2019-12-20 4:32 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Namhyung Kim
2019-12-20 9:33 ` Peter Zijlstra
2019-12-20 11:28 ` Namhyung Kim
2019-12-20 11:50 ` Namhyung Kim
2019-12-20 15:16 ` Tejun Heo
2019-12-21 8:49 ` Namhyung Kim
2019-12-23 6:07 [PATCHSET 0/9] perf: Improve cgroup profiling (v3) Namhyung Kim
2019-12-23 6:07 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Namhyung Kim
2020-01-07 12:51 ` Peter Zijlstra
2020-01-07 13:46 ` Namhyung Kim
2020-01-07 13:34 [PATCHSET 0/9] perf: Improve cgroup profiling (v4) Namhyung Kim
2020-01-07 13:34 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Namhyung Kim
2020-03-25 12:45 [PATCHSET 0/9] perf: Improve cgroup profiling (v6) Namhyung Kim
2020-03-25 12:45 ` [PATCH 1/9] perf/core: Add PERF_RECORD_CGROUP event Namhyung Kim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190828073130.83800-2-namhyung@kernel.org \
--to=namhyung@kernel.org \
--cc=a.p.zijlstra@chello.nl \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=eranian@google.com \
--cc=hannes@cmpxchg.org \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=lizefan@huawei.com \
--cc=mingo@kernel.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).