linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vikas Shivappa <vikas.shivappa@linux.intel.com>
To: linux-kernel@vger.kernel.org
Cc: vikas.shivappa@intel.com, x86@kernel.org, hpa@zytor.com,
	tglx@linutronix.de, mingo@kernel.org, peterz@infradead.org,
	matt.fleming@intel.com, will.auld@intel.com,
	linux-rdt@eclists.intel.com, vikas.shivappa@linux.intel.com
Subject: [PATCH 07/10] x86/intel_rdt: Add support for cache bit mask management
Date: Fri, 12 Jun 2015 11:17:14 -0700	[thread overview]
Message-ID: <1434133037-25189-8-git-send-email-vikas.shivappa@linux.intel.com> (raw)
In-Reply-To: <1434133037-25189-1-git-send-email-vikas.shivappa@linux.intel.com>

The change adds a file cache_mask to the RDT cgroup which represents the
cache bit mask(CBM) for the cgroup. cache_mask is specific to the Cache
allocation sub-feature of RDT. The tasks in the RDT cgroup would get to
fill the L3 cache represented by the cgroup's cache_mask file.

Update to the CBM is done by writing to the IA32_L3_MASK_n.  The RDT
cgroup follows cgroup hierarchy ,mkdir and adding tasks to the cgroup
never fails.  When a child cgroup is created it inherits the CLOSid and
the cache_mask from its parent.  When a user changes the default CBM for
a cgroup, a new CLOSid may be allocated if the cache_mask was not used
before. If the new CBM is the one that is already used, the count for
that CLOSid<->CBM is incremented. The changing of 'cache_mask' may fail
with -ENOSPC once the kernel runs out of maximum CLOSids it can support.

User can create as many cgroups as he wants but having different CBMs at
the same time is restricted by the maximum number of CLOSids .Kernel
maintains a CLOSid<->cbm mapping which keeps count of cgroups using a
CLOSid.

Reuse of CLOSids for cgroups with same bitmask also has following
advantages:
 - This helps to use the scant CLOSids optimally.
 - This also implies that during context switch, write to PQR-MSR is
 done only when a task with a different bitmask is scheduled in.

Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
---
 arch/x86/include/asm/intel_rdt.h |   3 +
 arch/x86/kernel/cpu/intel_rdt.c  | 205 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 207 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 2ce3e2c..3ad426c 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -4,6 +4,9 @@
 #ifdef CONFIG_CGROUP_RDT
 
 #include <linux/cgroup.h>
+#define MAX_CBM_LENGTH			32
+#define IA32_L3_CBM_BASE		0xc90
+#define CBM_FROM_INDEX(x)		(IA32_L3_CBM_BASE + x)
 
 struct rdt_subsys_info {
 	unsigned long *closmap;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 5ba241e..becb487 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -34,6 +34,13 @@ static struct clos_cbm_map *ccmap;
 static struct rdt_subsys_info rdtss_info;
 static DEFINE_MUTEX(rdt_group_mutex);
 struct intel_rdt rdt_root_group;
+/*
+ * Mask of CPUs for writing CBM values. We only need one CPU per-socket.
+ */
+static cpumask_t rdt_cpumask;
+
+#define rdt_for_each_child(pos_css, parent_ir)		\
+	css_for_each_child((pos_css), &(parent_ir)->css)
 
 static inline void closid_get(u32 closid)
 {
@@ -117,13 +124,195 @@ static void intel_rdt_css_free(struct cgroup_subsys_state *css)
 	mutex_unlock(&rdt_group_mutex);
 }
 
+static int intel_cache_alloc_cbm_read(struct seq_file *m, void *v)
+{
+	struct intel_rdt *ir = css_rdt(seq_css(m));
+
+	seq_printf(m, "%08lx\n", ccmap[ir->closid].cache_mask);
+
+	return 0;
+}
+
+static inline bool cbm_is_contiguous(unsigned long var)
+{
+	unsigned long maxcbm = MAX_CBM_LENGTH;
+	unsigned long first_bit, zero_bit;
+
+	if (!var)
+		return false;
+
+	first_bit = find_next_bit(&var, maxcbm, 0);
+	zero_bit = find_next_zero_bit(&var, maxcbm, first_bit);
+
+	if (find_next_bit(&var, maxcbm, zero_bit) < maxcbm)
+		return false;
+
+	return true;
+}
+
+static int cbm_validate(struct intel_rdt *ir, unsigned long cbmvalue)
+{
+	struct cgroup_subsys_state *css;
+	struct intel_rdt *par, *c;
+	unsigned long *cbm_tmp;
+	int err = 0;
+
+	if (!cbm_is_contiguous(cbmvalue)) {
+		pr_err("bitmask should have >= 1 bit and be contiguous\n");
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	par = parent_rdt(ir);
+	cbm_tmp = &ccmap[par->closid].cache_mask;
+	if (!bitmap_subset(&cbmvalue, cbm_tmp, MAX_CBM_LENGTH)) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	rcu_read_lock();
+	rdt_for_each_child(css, ir) {
+		c = css_rdt(css);
+		cbm_tmp = &ccmap[c->closid].cache_mask;
+		if (!bitmap_subset(cbm_tmp, &cbmvalue, MAX_CBM_LENGTH)) {
+			rcu_read_unlock();
+			pr_err("Children's mask not a subset\n");
+			err = -EINVAL;
+			goto out_err;
+		}
+	}
+	rcu_read_unlock();
+out_err:
+
+	return err;
+}
+
+static bool cbm_search(unsigned long cbm, u32 *closid)
+{
+	u32 maxid = boot_cpu_data.x86_rdt_max_closid;
+	u32 i;
+
+	for (i = 0; i < maxid; i++) {
+		if (bitmap_equal(&cbm, &ccmap[i].cache_mask, MAX_CBM_LENGTH)) {
+			*closid = i;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void closcbm_map_dump(void)
+{
+	u32 i;
+
+	pr_debug("CBMMAP\n");
+	for (i = 0; i < boot_cpu_data.x86_rdt_max_closid; i++) {
+		pr_debug("cache_mask: 0x%x,clos_refcnt: %u\n",
+		 (unsigned int)ccmap[i].cache_mask, ccmap[i].clos_refcnt);
+	}
+}
+
+static void cbm_cpu_update(void *info)
+{
+	u32 closid = (u32) info;
+
+	wrmsrl(CBM_FROM_INDEX(closid), ccmap[closid].cache_mask);
+}
+
+/*
+ * cbm_update_all() - Update the cache bit mask for all packages.
+ */
+static inline void cbm_update_all(u32 closid)
+{
+	on_each_cpu_mask(&rdt_cpumask, cbm_cpu_update, (void *)closid, 1);
+}
+
+/*
+ * intel_cache_alloc_cbm_write() - Validates and writes the
+ * cache bit mask(cbm) to the IA32_L3_MASK_n
+ * and also store the same in the ccmap.
+ *
+ * CLOSids are reused for cgroups which have same bitmask.
+ * This helps to use the scant CLOSids optimally. This also
+ * implies that at context switch write to PQR-MSR is done
+ * only when a task with a different bitmask is scheduled in.
+ */
+static int intel_cache_alloc_cbm_write(struct cgroup_subsys_state *css,
+				 struct cftype *cft, u64 cbmvalue)
+{
+	u32 max_cbm = boot_cpu_data.x86_rdt_max_cbm_len;
+	struct intel_rdt *ir = css_rdt(css);
+	ssize_t err = 0;
+	u64 max_mask;
+	u32 closid;
+
+	if (ir == &rdt_root_group)
+		return -EPERM;
+
+	/*
+	 * Need global mutex as cbm write may allocate a closid.
+	 */
+	mutex_lock(&rdt_group_mutex);
+
+	max_mask = (1ULL << max_cbm) - 1;
+	if (cbmvalue & ~max_mask) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (cbmvalue == ccmap[ir->closid].cache_mask)
+		goto out;
+
+	err = cbm_validate(ir, cbmvalue);
+	if (err)
+		goto out;
+
+	/*
+	 * Try to get a reference for a different CLOSid and release the
+	 * reference to the current CLOSid.
+	 * Need to put down the reference here and get it back in case we
+	 * run out of closids. Otherwise we run into a problem when
+	 * we could be using the last closid that could have been available.
+	 */
+	closid_put(ir->closid);
+	if (cbm_search(cbmvalue, &closid)) {
+		ir->closid = closid;
+		closid_get(closid);
+	} else {
+		closid = ir->closid;
+		err = closid_alloc(ir);
+		if (err) {
+			closid_get(ir->closid);
+			goto out;
+		}
+
+		ccmap[ir->closid].cache_mask = cbmvalue;
+		cbm_update_all(ir->closid);
+	}
+	closcbm_map_dump();
+out:
+	mutex_unlock(&rdt_group_mutex);
+
+	return err;
+}
+
+static inline void rdt_cpumask_update(int cpu)
+{
+	cpumask_t tmp;
+
+	cpumask_and(&tmp, &rdt_cpumask, topology_core_cpumask(cpu));
+	if (cpumask_empty(&tmp))
+		cpumask_set_cpu(cpu, &rdt_cpumask);
+}
+
 static int __init intel_rdt_late_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 	static struct clos_cbm_map *ccm;
 	u32 maxid, max_cbm_len;
+	int err = 0, i;
 	size_t sizeb;
-	int err = 0;
 
 	if (!cpu_has(c, X86_FEATURE_CAT_L3)) {
 		rdt_root_group.css.ss->disabled = 1;
@@ -153,6 +342,9 @@ static int __init intel_rdt_late_init(void)
 	ccm->cache_mask = (1ULL << max_cbm_len) - 1;
 	ccm->clos_refcnt = 1;
 
+	for_each_online_cpu(i)
+		rdt_cpumask_update(i);
+
 	pr_info("Intel cache allocation enabled\n");
 out_err:
 
@@ -161,8 +353,19 @@ out_err:
 
 late_initcall(intel_rdt_late_init);
 
+static struct cftype rdt_files[] = {
+	{
+		.name		= "cache_mask",
+		.seq_show	= intel_cache_alloc_cbm_read,
+		.write_u64	= intel_cache_alloc_cbm_write,
+		.mode		= 0666,
+	},
+	{ }	/* terminate */
+};
+
 struct cgroup_subsys intel_rdt_cgrp_subsys = {
 	.css_alloc		= intel_rdt_css_alloc,
 	.css_free		= intel_rdt_css_free,
+	.legacy_cftypes	= rdt_files,
 	.early_init		= 0,
 };
-- 
1.9.1


  parent reply	other threads:[~2015-06-12 18:22 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-12 18:17 [PATCH V9 00/10] New cpumask API and Intel Cache Allocation support Vikas Shivappa
2015-06-12 18:17 ` [PATCH 01/10] cpumask: Introduce cpumask_any_online_but Vikas Shivappa
2015-06-15 12:36   ` Peter Zijlstra
2015-06-15 16:48     ` Vikas Shivappa
2015-06-16  8:20       ` Thomas Gleixner
2015-06-16 19:00         ` Vikas Shivappa
2015-06-12 18:17 ` [PATCH 02/10] x86/intel_cqm: Modify hot cpu notification handling Vikas Shivappa
2015-06-15 12:38   ` Peter Zijlstra
2015-06-15 16:52     ` Vikas Shivappa
2015-06-12 18:17 ` [PATCH 03/10] x86/intel_rapl: Modify hot cpu notification handling for RAPL Vikas Shivappa
2015-06-12 18:17 ` [PATCH 04/10] x86/intel_rdt: Cache Allocation documentation and cgroup usage guide Vikas Shivappa
2015-06-12 18:17 ` [PATCH 05/10] x86/intel_rdt: Add support for Cache Allocation detection Vikas Shivappa
2015-06-15 12:48   ` Peter Zijlstra
2015-06-15 17:04     ` Vikas Shivappa
2015-06-12 18:17 ` [PATCH 06/10] x86/intel_rdt: Add new cgroup and Class of service management Vikas Shivappa
2015-06-12 18:17 ` Vikas Shivappa [this message]
2015-06-12 18:17 ` [PATCH 08/10] x86/intel_rdt: Implement scheduling support for Intel RDT Vikas Shivappa
2015-06-12 18:17 ` [PATCH 09/10] x86/intel_rdt: Hot cpu support for Cache Allocation Vikas Shivappa
2015-06-16  8:52   ` Thomas Gleixner
2015-06-16 19:01     ` Vikas Shivappa
2015-06-16 19:25       ` Thomas Gleixner
2015-06-19 20:42         ` Vikas Shivappa
2015-06-12 18:17 ` [PATCH 10/10] x86/intel_rdt: Intel haswell Cache Allocation enumeration Vikas Shivappa
2015-06-15 14:05   ` Peter Zijlstra
2015-06-15 21:44     ` Vikas Shivappa
2015-06-16  8:23       ` Thomas Gleixner
2015-06-16  9:18       ` Peter Zijlstra
2015-06-17 16:23         ` Vikas Shivappa
  -- strict thread matches above, loose matches on Subject: below --
2015-06-23 22:56 [PATCH V10 00/10] New cpumask API and Intel Cache Allocation support Vikas Shivappa
2015-06-23 22:56 ` [PATCH 07/10] x86/intel_rdt: Add support for cache bit mask management Vikas Shivappa
2015-06-05  0:01 [PATCH V8 00/10] New cpumask API and Intel Cache Allocation support Vikas Shivappa
2015-06-05  0:01 ` [PATCH 07/10] x86/intel_rdt: Add support for cache bit mask management Vikas Shivappa
2015-06-03 19:09 [PATCH V8 00/10] New cpumask API and Intel Cache Allocation support Vikas Shivappa
2015-06-03 19:09 ` [PATCH 07/10] x86/intel_rdt: Add support for cache bit mask management Vikas Shivappa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1434133037-25189-8-git-send-email-vikas.shivappa@linux.intel.com \
    --to=vikas.shivappa@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdt@eclists.intel.com \
    --cc=matt.fleming@intel.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=vikas.shivappa@intel.com \
    --cc=will.auld@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).