linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Jiri Olsa <jolsa@redhat.com>, Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	LKML <linux-kernel@vger.kernel.org>,
	Stephane Eranian <eranian@google.com>,
	Andi Kleen <ak@linux.intel.com>, Ian Rogers <irogers@google.com>,
	Song Liu <songliubraving@fb.com>
Subject: [PATCH v3 2/2] perf/core: Support reading group events with shared cgroups
Date: Tue, 13 Apr 2021 08:53:37 -0700	[thread overview]
Message-ID: <20210413155337.644993-3-namhyung@kernel.org> (raw)
In-Reply-To: <20210413155337.644993-1-namhyung@kernel.org>

This enables reading event group's counter values together with a
PERF_EVENT_IOC_READ_CGROUP command like we do in the regular read().
Users should give a correct size of buffer to be read which includes
the total buffer size and the cgroup id.

Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 kernel/events/core.c | 120 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 117 insertions(+), 3 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index bcf51c0b7855..7440857d680e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2232,13 +2232,24 @@ static void perf_add_cgrp_node_list(struct perf_event *event,
 {
 	struct list_head *cgrp_ctx_list = this_cpu_ptr(&cgroup_ctx_list);
 	struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
+	struct perf_event *sibling;
 	bool is_first;
 
 	lockdep_assert_irqs_disabled();
 	lockdep_assert_held(&ctx->lock);
 
+	/* only group leader can be added directly */
+	if (event->group_leader != event)
+		return;
+
+	if (!event_has_cgroup_node(event))
+		return;
+
 	is_first = list_empty(&ctx->cgrp_node_list);
+
 	list_add_tail(&event->cgrp_node_entry, &ctx->cgrp_node_list);
+	for_each_sibling_event(sibling, event)
+		list_add_tail(&sibling->cgrp_node_entry, &ctx->cgrp_node_list);
 
 	if (is_first)
 		list_add_tail(&ctx->cgrp_ctx_entry, cgrp_ctx_list);
@@ -2250,15 +2261,25 @@ static void perf_del_cgrp_node_list(struct perf_event *event,
 				    struct perf_event_context *ctx)
 {
 	struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
+	struct perf_event *sibling;
 
 	lockdep_assert_irqs_disabled();
 	lockdep_assert_held(&ctx->lock);
 
+	/* only group leader can be deleted directly */
+	if (event->group_leader != event)
+		return;
+
+	if (!event_has_cgroup_node(event))
+		return;
+
 	update_cgroup_node(event, cgrp->css.cgroup);
 	/* to refresh delta when it's enabled */
 	event->cgrp_node_count = 0;
 
 	list_del(&event->cgrp_node_entry);
+	for_each_sibling_event(sibling, event)
+		list_del(&sibling->cgrp_node_entry);
 
 	if (list_empty(&ctx->cgrp_node_list))
 		list_del(&ctx->cgrp_ctx_entry);
@@ -2333,7 +2354,7 @@ static int perf_event_attach_cgroup_node(struct perf_event *event, u64 nr_cgrps,
 
 	raw_spin_unlock_irqrestore(&ctx->lock, flags);
 
-	if (is_first && enabled)
+	if (is_first && enabled && event->group_leader == event)
 		event_function_call(event, perf_attach_cgroup_node, NULL);
 
 	return 0;
@@ -2370,8 +2391,8 @@ static void __perf_read_cgroup_node(struct perf_event *event)
 	}
 }
 
-static int perf_event_read_cgroup_node(struct perf_event *event, u64 read_size,
-				       u64 cgrp_id, char __user *buf)
+static int perf_event_read_cgrp_node_one(struct perf_event *event, u64 cgrp_id,
+					 char __user *buf)
 {
 	struct perf_cgroup_node *cgrp;
 	struct perf_event_context *ctx = event->ctx;
@@ -2406,6 +2427,92 @@ static int perf_event_read_cgroup_node(struct perf_event *event, u64 read_size,
 
 	return n * sizeof(u64);
 }
+
+static int perf_event_read_cgrp_node_sibling(struct perf_event *event,
+					     u64 read_format, u64 cgrp_id,
+					     u64 *values)
+{
+	struct perf_cgroup_node *cgrp;
+	int n = 0;
+
+	cgrp = find_cgroup_node(event, cgrp_id);
+	if (cgrp == NULL)
+		return (read_format & PERF_FORMAT_ID) ? 2 : 1;
+
+	values[n++] = cgrp->count;
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(event);
+	return n;
+}
+
+static int perf_event_read_cgrp_node_group(struct perf_event *event, u64 cgrp_id,
+					   char __user *buf)
+{
+	struct perf_cgroup_node *cgrp;
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_event *sibling;
+	u64 read_format = event->attr.read_format;
+	unsigned long flags;
+	u64 *values;
+	int n = 1;
+	int ret;
+
+	values = kzalloc(event->read_size, GFP_KERNEL);
+	if (!values)
+		return -ENOMEM;
+
+	values[0] = 1 + event->nr_siblings;
+
+	/* update event count and times (possibly run on other cpu) */
+	(void)perf_event_read(event, true);
+
+	raw_spin_lock_irqsave(&ctx->lock, flags);
+
+	cgrp = find_cgroup_node(event, cgrp_id);
+	if (cgrp == NULL) {
+		raw_spin_unlock_irqrestore(&ctx->lock, flags);
+		kfree(values);
+		return -ENOENT;
+	}
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = cgrp->time_enabled;
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = cgrp->time_running;
+
+	values[n++] = cgrp->count;
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(event);
+
+	for_each_sibling_event(sibling, event) {
+		n += perf_event_read_cgrp_node_sibling(sibling, read_format,
+						       cgrp_id, &values[n]);
+	}
+
+	raw_spin_unlock_irqrestore(&ctx->lock, flags);
+
+	ret = copy_to_user(buf, values, n * sizeof(u64));
+	kfree(values);
+	if (ret)
+		return -EFAULT;
+
+	return n * sizeof(u64);
+}
+
+static int perf_event_read_cgroup_node(struct perf_event *event, u64 read_size,
+				       u64 cgrp_id, char __user *buf)
+{
+	u64 read_format = event->attr.read_format;
+
+	/* buf = bufsize + cgroup_id + read_buffer */
+	if (read_size < 2 * sizeof(u64) + event->read_size)
+		return -EINVAL;
+
+	if (read_format & PERF_FORMAT_GROUP)
+		return perf_event_read_cgrp_node_group(event, cgrp_id, buf);
+
+	return perf_event_read_cgrp_node_one(event, cgrp_id, buf);
+}
 #else  /* !CONFIG_CGROUP_PERF */
 static inline bool event_can_attach_cgroup(struct perf_event *event)
 {
@@ -2512,6 +2619,7 @@ static void perf_group_detach(struct perf_event *event)
 			if (sibling->state == PERF_EVENT_STATE_ACTIVE)
 				list_add_tail(&sibling->active_list, get_event_list(sibling));
 		}
+		perf_add_cgrp_node_list(sibling, event->ctx);
 
 		WARN_ON_ONCE(sibling->ctx != event->ctx);
 	}
@@ -2655,6 +2763,9 @@ __perf_remove_from_context(struct perf_event *event,
 		perf_group_detach(event);
 	list_del_event(event, ctx);
 
+	if (event->state > PERF_EVENT_STATE_OFF)
+		perf_del_cgrp_node_list(event, ctx);
+
 	if (!ctx->nr_events && ctx->is_active) {
 		ctx->is_active = 0;
 		ctx->rotate_necessary = 0;
@@ -3113,6 +3224,9 @@ static int  __perf_install_in_context(void *info)
 		reprogram = cgroup_is_descendant(cgrp->css.cgroup,
 					event->cgrp->css.cgroup);
 	}
+
+	if (event->state > PERF_EVENT_STATE_OFF)
+		perf_add_cgrp_node_list(event, ctx);
 #endif
 
 	if (reprogram) {
-- 
2.31.1.295.g9ea45b61b8-goog


      parent reply	other threads:[~2021-04-13 15:54 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-13 15:53 [PATCH v3 0/2] perf core: Sharing events with multiple cgroups Namhyung Kim
2021-04-13 15:53 ` [PATCH v3 1/2] perf/core: Share an event " Namhyung Kim
2021-04-15 14:51   ` Peter Zijlstra
2021-04-15 23:48     ` Namhyung Kim
2021-04-16  9:26       ` Peter Zijlstra
2021-04-16  9:29         ` Peter Zijlstra
2021-04-16 10:19           ` Namhyung Kim
2021-04-16 10:27           ` Peter Zijlstra
2021-04-16 11:22             ` Namhyung Kim
2021-04-16 11:59               ` Peter Zijlstra
2021-04-16 12:19                 ` Namhyung Kim
2021-04-16 13:39                   ` Peter Zijlstra
2021-05-09  7:13                 ` Namhyung Kim
2021-04-16 10:18         ` Namhyung Kim
2021-04-16  9:49     ` Namhyung Kim
2021-04-20 10:28       ` Peter Zijlstra
2021-04-20 18:37         ` Namhyung Kim
2021-04-20 18:43           ` Peter Zijlstra
2021-04-20  8:34     ` Stephane Eranian
2021-04-20  9:48       ` Peter Zijlstra
2021-04-20 11:28       ` Peter Zijlstra
2021-04-21 19:37         ` Namhyung Kim
2021-05-03 21:53           ` Namhyung Kim
2021-04-13 15:53 ` Namhyung Kim [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210413155337.644993-3-namhyung@kernel.org \
    --to=namhyung@kernel.org \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=songliubraving@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).