linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vikas Shivappa <vikas.shivappa@linux.intel.com>
To: vikas.shivappa@intel.com, vikas.shivappa@linux.intel.com
Cc: linux-kernel@vger.kernel.org, x86@kernel.org, tglx@linutronix.de,
	peterz@infradead.org, ravi.v.shankar@intel.com,
	tony.luck@intel.com, fenghua.yu@intel.com, andi.kleen@intel.com,
	davidcc@google.com, eranian@google.com, hpa@zytor.com
Subject: [PATCH 09/14] x86/cqm: Add Continuous cgroup monitoring
Date: Fri, 16 Dec 2016 15:13:03 -0800	[thread overview]
Message-ID: <1481929988-31569-10-git-send-email-vikas.shivappa@linux.intel.com> (raw)
In-Reply-To: <1481929988-31569-1-git-send-email-vikas.shivappa@linux.intel.com>

This patch adds support for cgroup continuous monitoring which enables
to start monitoring a cgroup by toggling the cont_monitor field in the
cgroup without any perf overhead.
The cgroup would be monitored from the time this field is set
and user can fetch the data from the perf when data is needed.
This avoids perf over head all along the time that the cgroup is being
monitored and if one has to monitor a cgroup for its lifetime, it doesnt
need running perf the whole time.

A new file is introduced in the cgroup cont_mon. Once this is enabled a
new RMID is assigned to the cgroup. If an event is created to monitor
this cgroup again, the event just reuses the same RMID. At switch_to
time, we add a check to see if there is cont_monitoring. During read,
data is fetched by reading the counters in the same was as its done for
other cgroups.

Tests: Should be able to monitor cgroup continuously without perf by
toggling the new cont_mon file in the cgroup.

Patch is based on David Carrillo-Cisneros <davidcc@google.com> patches
in cqm2 series.

Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
---
 arch/x86/events/intel/cqm.c | 119 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 114 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 8017886..73f566a 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -521,6 +521,7 @@ static int cqm_assign_rmid(struct perf_event *event, u32 *rmid)
 static int intel_cqm_setup_event(struct perf_event *event,
 				  struct perf_event **group)
 {
+	struct cgrp_cqm_info *cqm_info;
 	struct perf_event *iter;
 	u32 *rmid, sizet;
 
@@ -537,6 +538,18 @@ static int intel_cqm_setup_event(struct perf_event *event,
 			return 0;
 		}
 	}
+#ifdef CONFIG_CGROUP_PERF
+	/*
+	 * For continously monitored cgroups, *rmid is allocated already.
+	 */
+	if (event->cgrp) {
+		cqm_info = cgrp_to_cqm_info(event->cgrp);
+		if (cqm_info->cont_mon) {
+			event->hw.cqm_rmid = cqm_info->rmid;
+			return 0;
+		}
+	}
+#endif
 
 	/*
 	 * RMIDs are allocated in LAZY mode by default only when
@@ -547,6 +560,8 @@ static int intel_cqm_setup_event(struct perf_event *event,
 	if (!event->hw.cqm_rmid)
 		return -ENOMEM;
 
+	cqm_assign_rmid(event, event->hw.cqm_rmid);
+
 	return 0;
 }
 
@@ -843,18 +858,23 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
 	return 0;
 }
 
+static inline bool is_cont_mon_event(struct perf_event *event);
+
 static inline void
 	cqm_event_free_rmid(struct perf_event *event)
 {
 	u32 *rmid = event->hw.cqm_rmid;
 	int d;
 
-	for (d = 0; d < cqm_socket_max; d++) {
-		if (__rmid_valid(rmid[d]))
-			__put_rmid(rmid[d], d);
+	if (!is_cont_mon_event(event)) {
+
+		for (d = 0; d < cqm_socket_max; d++) {
+			if (__rmid_valid(rmid[d]))
+				__put_rmid(rmid[d], d);
+		}
+		cqm_assign_rmid(event, NULL);
+		kfree(event->hw.cqm_rmid);
 	}
-	kfree(event->hw.cqm_rmid);
-	cqm_assign_rmid(event, NULL);
 	list_del(&event->hw.cqm_groups_entry);
 }
 
@@ -1122,6 +1142,11 @@ static int intel_cqm_event_init(struct perf_event *event)
 };
 
 #ifdef CONFIG_CGROUP_PERF
+static inline bool is_cont_mon_event(struct perf_event *event)
+{
+	return (is_cgroup_event(event) && cgrp_to_cqm_info(event->cgrp)->cont_mon);
+}
+
 int perf_cgroup_arch_css_alloc(struct cgroup_subsys_state *parent_css,
 				      struct cgroup_subsys_state *new_css)
 {
@@ -1230,6 +1255,90 @@ int perf_cgroup_arch_can_attach(struct cgroup_taskset *tset)
 
 	return 0;
 }
+
+/* kernfs guarantees that css doesn't need to be pinned. */
+static u64 cqm_cont_monitoring_read_u64(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	int ret = -1;
+
+	mutex_lock(&cache_mutex);
+	ret = css_to_cqm_info(css)->cont_mon;
+	mutex_unlock(&cache_mutex);
+
+	return ret;
+}
+
+/* kernfs guarantees that css doesn't need to be pinned. */
+static int cqm_cont_monitoring_write_u64(struct cgroup_subsys_state *css,
+					 struct cftype *cft, u64 value)
+{
+	struct cgrp_cqm_info *cqm_info;
+	unsigned long flags;
+	int ret = 0, d;
+
+	if (value > 1)
+		return -1;
+
+	mutex_lock(&cache_mutex);
+
+	/* Root cgroup cannot stop being monitored. */
+	if (!css->parent)
+		goto out;
+
+	cqm_info = css_to_cqm_info(css);
+
+	/*
+	 * Alloc and free rmid when cont monitoring is being set
+	 * and reset.
+	 */
+	if (!cqm_info->cont_mon && value && !cqm_info->rmid) {
+		cqm_info->rmid =
+			kzalloc(sizeof(u32) * cqm_socket_max, GFP_KERNEL);
+		if (!cqm_info->rmid) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		cqm_assign_hier_rmid(css, cqm_info->rmid);
+	}
+
+	if (cqm_info->cont_mon && !value) {
+		u32 *rmid = cqm_info->rmid;
+
+		raw_spin_lock_irqsave(&cache_lock, flags);
+		for (d = 0; d < cqm_socket_max; d++) {
+			if (__rmid_valid(rmid[d]))
+				__put_rmid(rmid[d], d);
+		}
+		raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+		kfree(cqm_info->rmid);
+		cqm_assign_hier_rmid(css, NULL);
+	}
+
+	cqm_info->cont_mon = value;
+out:
+	mutex_unlock(&cache_mutex);
+
+	return ret;
+}
+
+struct cftype perf_event_cgrp_arch_subsys_cftypes[] = {
+	{
+		.name = "cqm_cont_monitoring",
+		.read_u64 = cqm_cont_monitoring_read_u64,
+		.write_u64 = cqm_cont_monitoring_write_u64,
+	},
+
+	{}	/* terminate */
+};
+#else
+
+static inline bool is_cont_mon_event(struct perf_event *event)
+{
+	return false;
+}
 #endif
 
 static inline void cqm_pick_event_reader(int cpu)
-- 
1.9.1

  parent reply	other threads:[~2016-12-16 23:15 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-16 23:12 [PATCH V4 00/14] Cqm2: Intel Cache Monitoring fixes and enhancements Vikas Shivappa
2016-12-16 23:12 ` [PATCH 01/14] x86/cqm: Intel Resource Monitoring Documentation Vikas Shivappa
2016-12-23 12:32   ` Peter Zijlstra
2016-12-23 19:35     ` Shivappa Vikas
2016-12-23 20:33       ` Peter Zijlstra
2016-12-23 21:41         ` Shivappa Vikas
2016-12-25  1:51         ` Shivappa Vikas
2016-12-27  7:13           ` David Carrillo-Cisneros
2016-12-27 20:00           ` Andi Kleen
2016-12-27 20:21             ` Shivappa Vikas
2016-12-27 21:38               ` David Carrillo-Cisneros
2016-12-27 21:33             ` David Carrillo-Cisneros
2016-12-27 23:10               ` Andi Kleen
2016-12-28  1:23                 ` David Carrillo-Cisneros
2016-12-28 20:03                   ` Shivappa Vikas
2016-12-16 23:12 ` [PATCH 02/14] x86/cqm: Remove cqm recycling/conflict handling Vikas Shivappa
2016-12-16 23:12 ` [PATCH 03/14] x86/rdt: Add rdt common/cqm compile option Vikas Shivappa
2016-12-16 23:12 ` [PATCH 04/14] x86/cqm: Add Per pkg rmid support Vikas Shivappa
2016-12-16 23:12 ` [PATCH 05/14] x86/cqm,perf/core: Cgroup support prepare Vikas Shivappa
2016-12-16 23:13 ` [PATCH 06/14] x86/cqm: Add cgroup hierarchical monitoring support Vikas Shivappa
2016-12-16 23:13 ` [PATCH 07/14] x86/rdt,cqm: Scheduling support update Vikas Shivappa
2016-12-16 23:13 ` [PATCH 08/14] x86/cqm: Add support for monitoring task and cgroup together Vikas Shivappa
2016-12-16 23:13 ` Vikas Shivappa [this message]
2016-12-16 23:13 ` [PATCH 10/14] x86/cqm: Add RMID reuse Vikas Shivappa
2016-12-16 23:13 ` [PATCH 11/14] x86/cqm: Add failure on open and read Vikas Shivappa
2016-12-23 11:58   ` David Carrillo-Cisneros
2016-12-16 23:13 ` [PATCH 12/14] perf/core,x86/cqm: Add read for Cgroup events,per pkg reads Vikas Shivappa
2016-12-16 23:13 ` [PATCH 13/14] perf/stat: fix bug in handling events in error state Vikas Shivappa
2016-12-16 23:13 ` [PATCH 14/14] perf/stat: revamp read error handling, snapshot and per_pkg events Vikas Shivappa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1481929988-31569-10-git-send-email-vikas.shivappa@linux.intel.com \
    --to=vikas.shivappa@linux.intel.com \
    --cc=andi.kleen@intel.com \
    --cc=davidcc@google.com \
    --cc=eranian@google.com \
    --cc=fenghua.yu@intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=vikas.shivappa@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).