linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vikas Shivappa <vikas.shivappa@linux.intel.com>
To: linux-kernel@vger.kernel.org
Cc: vikas.shivappa@intel.com, x86@kernel.org, hpa@zytor.com,
	tglx@linutronix.de, mingo@kernel.org, tj@kernel.org,
	peterz@infradead.org, matt.fleming@intel.com,
	will.auld@intel.com, kanaka.d.juvva@intel.com,
	vikas.shivappa@linux.intel.com
Subject: [PATCH 07/10] x86/intel_rdt: Add support for cache bit mask management
Date: Wed,  3 Jun 2015 12:09:58 -0700	[thread overview]
Message-ID: <1433358601-20255-8-git-send-email-vikas.shivappa@linux.intel.com> (raw)
In-Reply-To: <1433358601-20255-1-git-send-email-vikas.shivappa@linux.intel.com>

The change adds a file cache_mask to the RDT cgroup which represents the
cache bit mask(CBM) for the cgroup. cache_mask is specific to the Cache
allocation sub-feature of RDT. The tasks in the RDT cgroup would get to
fill the L3 cache represented by the cgroup's cache_mask file.

Update to the CBM is done by writing to the IA32_L3_MASK_n.  The RDT
cgroup follows cgroup hierarchy ,mkdir and adding tasks to the cgroup
never fails.  When a child cgroup is created it inherits the CLOSid and
the cache_mask from its parent.  When a user changes the default CBM for
a cgroup, a new CLOSid may be allocated if the cache_mask was not used
before. If the new CBM is the one that is already used, the count for
that CLOSid<->CBM is incremented. The changing of 'cache_mask' may fail
with -ENOSPC once the kernel runs out of maximum CLOSids it can support.

User can create as many cgroups as he wants but having different CBMs at
the same time is restricted by the maximum number of CLOSids .Kernel
maintains a CLOSid<->cbm mapping which keeps count of cgroups using a
CLOSid.

Reuse of CLOSids for cgroups with same bitmask also has following
advantages:
 - This helps to use the scant CLOSids optimally.
 - This also implies that during context switch, write to PQR-MSR is
 done only when a task with a different bitmask is scheduled in.

Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
---
Changes as per Thomas's feedback:

- changed the names of functions to only have intel_ prefix for external
APIs.
-replaced (void *)&closid with (void *)closid when calling
on_each_cpu_mask
-fixed the reference release of closid during cache bitmask write.
-changed the code to not ignore a cache mask which has bits set outside
of the max bits allowed. It returns an error instead.
-replaced bitmap_set(&max_mask, 0, max_cbm_len) with max_mask =
(1ULL << max_cbm) - 1.

 arch/x86/include/asm/intel_rdt.h |   3 +
 arch/x86/kernel/cpu/intel_rdt.c  | 201 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 203 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 147c9cf..ba4601f 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -4,6 +4,9 @@
 #ifdef CONFIG_CGROUP_RDT
 
 #include <linux/cgroup.h>
+#define MAX_CBM_LENGTH			32
+#define IA32_L3_CBM_BASE		0xc90
+#define CBM_FROM_INDEX(x)		(IA32_L3_CBM_BASE + x)
 
 struct rdt_subsys_info {
 	unsigned long *closmap;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 7d455b9..f857381 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -34,6 +34,13 @@ static struct clos_cbm_map *ccmap;
 static struct rdt_subsys_info rdtss_info;
 static DEFINE_MUTEX(rdt_group_mutex);
 struct intel_rdt rdt_root_group;
+/*
+ * Mask of CPUs for writing CBM values. We only need one CPU per-socket.
+ */
+static cpumask_t rdt_cpumask;
+
+#define rdt_for_each_child(pos_css, parent_ir)		\
+	css_for_each_child((pos_css), &(parent_ir)->css)
 
 static inline void clos_get(unsigned int closid)
 {
@@ -116,11 +123,189 @@ static void intel_rdt_css_free(struct cgroup_subsys_state *css)
 	mutex_unlock(&rdt_group_mutex);
 }
 
+static int intel_cache_alloc_cbm_read(struct seq_file *m, void *v)
+{
+	struct intel_rdt *ir = css_rdt(seq_css(m));
+
+	seq_printf(m, "%08lx\n", ccmap[ir->clos].cache_mask);
+
+	return 0;
+}
+
+static inline bool cbm_is_contiguous(unsigned long var)
+{
+	unsigned long maxcbm = MAX_CBM_LENGTH;
+	unsigned long first_bit, zero_bit;
+
+	if (!var)
+		return false;
+
+	first_bit = find_next_bit(&var, maxcbm, 0);
+	zero_bit = find_next_zero_bit(&var, maxcbm, first_bit);
+
+	if (find_next_bit(&var, maxcbm, zero_bit) < maxcbm)
+		return false;
+
+	return true;
+}
+
+static int cbm_validate(struct intel_rdt *ir, unsigned long cbmvalue)
+{
+	struct cgroup_subsys_state *css;
+	struct intel_rdt *par, *c;
+	unsigned long *cbm_tmp;
+	int err = 0;
+
+	if (!cbm_is_contiguous(cbmvalue)) {
+		pr_err("bitmask should have >= 1 bit and be contiguous\n");
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	par = parent_rdt(ir);
+	cbm_tmp = &ccmap[par->clos].cache_mask;
+	if (!bitmap_subset(&cbmvalue, cbm_tmp, MAX_CBM_LENGTH)) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	rcu_read_lock();
+	rdt_for_each_child(css, ir) {
+		c = css_rdt(css);
+		cbm_tmp = &ccmap[c->clos].cache_mask;
+		if (!bitmap_subset(cbm_tmp, &cbmvalue, MAX_CBM_LENGTH)) {
+			rcu_read_unlock();
+			pr_err("Children's mask not a subset\n");
+			err = -EINVAL;
+			goto out_err;
+		}
+	}
+	rcu_read_unlock();
+out_err:
+
+	return err;
+}
+
+static bool cbm_search(unsigned long cbm, int *closid)
+{
+	int maxid = boot_cpu_data.x86_rdt_max_closid;
+	unsigned int i;
+
+	for (i = 0; i < maxid; i++) {
+		if (bitmap_equal(&cbm, &ccmap[i].cache_mask, MAX_CBM_LENGTH)) {
+			*closid = i;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void closcbm_map_dump(void)
+{
+	int i;
+
+	pr_debug("CBMMAP\n");
+	for (i = 0; i < boot_cpu_data.x86_rdt_max_closid; i++) {
+		pr_debug("cache_mask: 0x%x,clos_refcnt: %u\n",
+		 (unsigned int)ccmap[i].cache_mask, ccmap[i].clos_refcnt);
+	}
+}
+
+static void cbm_cpu_update(void *info)
+{
+	unsigned int closid = (unsigned int) info;
+
+	wrmsrl(CBM_FROM_INDEX(closid), ccmap[closid].cache_mask);
+}
+
+/*
+ * cbm_update_all() - Update the cache bit mask for all packages.
+ */
+static inline void cbm_update_all(unsigned int closid)
+{
+	on_each_cpu_mask(&rdt_cpumask, cbm_cpu_update, (void *)closid, 1);
+}
+
+/*
+ * intel_cache_alloc_cbm_write() - Validates and writes the
+ * cache bit mask(cbm) to the IA32_L3_MASK_n
+ * and also store the same in the ccmap.
+ *
+ * CLOSids are reused for cgroups which have same bitmask.
+ * This helps to use the scant CLOSids optimally. This also
+ * implies that at context switch write to PQR-MSR is done
+ * only when a task with a different bitmask is scheduled in.
+ */
+static int intel_cache_alloc_cbm_write(struct cgroup_subsys_state *css,
+				 struct cftype *cft, u64 cbmvalue)
+{
+	u32 max_cbm = boot_cpu_data.x86_rdt_max_cbm_len;
+	struct intel_rdt *ir = css_rdt(css);
+	unsigned int closid;
+	ssize_t err = 0;
+	u64 max_mask;
+
+	if (ir == &rdt_root_group)
+		return -EPERM;
+
+	/*
+	 * Need global mutex as cbm write may allocate a closid.
+	 */
+	mutex_lock(&rdt_group_mutex);
+
+	max_mask = (1ULL << max_cbm) - 1;
+	if (cbmvalue & ~max_mask) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (cbmvalue == ccmap[ir->clos].cache_mask)
+		goto out;
+
+	err = cbm_validate(ir, cbmvalue);
+	if (err)
+		goto out;
+
+	/*
+	 * Try to get a reference for a different CLOSid and release the
+	 * reference to the current CLOSid.
+	 */
+	if (cbm_search(cbmvalue, &closid)) {
+		clos_put(ir->clos);
+		ir->clos = closid;
+		clos_get(closid);
+	} else {
+		closid = ir->clos;
+		err = clos_alloc(ir);
+		if (err)
+			goto out;
+
+		clos_put(closid);
+		ccmap[ir->clos].cache_mask = cbmvalue;
+		cbm_update_all(ir->clos);
+	}
+	closcbm_map_dump();
+out:
+	mutex_unlock(&rdt_group_mutex);
+
+	return err;
+}
+
+static inline void rdt_cpumask_update(int cpu)
+{
+	cpumask_t tmp;
+
+	cpumask_and(&tmp, &rdt_cpumask, topology_core_cpumask(cpu));
+	if (cpumask_empty(&tmp))
+		cpumask_set_cpu(cpu, &rdt_cpumask);
+}
+
 static int __init intel_rdt_late_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
+	int maxid, max_cbm_len, err = 0, i;
 	static struct clos_cbm_map *ccm;
-	int maxid, max_cbm_len, err = 0;
 	size_t sizeb;
 
 	if (!cpu_has(c, X86_FEATURE_CAT_L3)) {
@@ -151,6 +336,9 @@ static int __init intel_rdt_late_init(void)
 	ccm->cache_mask = (1ULL << max_cbm_len) - 1;
 	ccm->clos_refcnt = 1;
 
+	for_each_online_cpu(i)
+		rdt_cpumask_update(i);
+
 	pr_info("Intel cache allocation enabled\n");
 out_err:
 
@@ -159,8 +347,19 @@ out_err:
 
 late_initcall(intel_rdt_late_init);
 
+static struct cftype rdt_files[] = {
+	{
+		.name		= "cache_mask",
+		.seq_show	= intel_cache_alloc_cbm_read,
+		.write_u64	= intel_cache_alloc_cbm_write,
+		.mode		= 0666,
+	},
+	{ }	/* terminate */
+};
+
 struct cgroup_subsys intel_rdt_cgrp_subsys = {
 	.css_alloc		= intel_rdt_css_alloc,
 	.css_free		= intel_rdt_css_free,
+	.legacy_cftypes	= rdt_files,
 	.early_init		= 0,
 };
-- 
1.9.1


  parent reply	other threads:[~2015-06-03 19:12 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-03 19:09 [PATCH V8 00/10] New cpumask API and Intel Cache Allocation support Vikas Shivappa
2015-06-03 19:09 ` [PATCH 01/10] cpumask: Introduce cpumask_any_online_but Vikas Shivappa
2015-06-03 19:09 ` [PATCH 02/10] x86/intel_cqm: Modify hot cpu notification handling Vikas Shivappa
2015-06-03 19:09 ` [PATCH 03/10] x86/intel_rapl: Modify hot cpu notification handling for RAPL Vikas Shivappa
2015-06-03 19:09 ` [PATCH 04/10] x86/intel_rdt: Cache Allocation documentation and cgroup usage guide Vikas Shivappa
2015-06-03 19:09 ` [PATCH 05/10] x86/intel_rdt: Add support for Cache Allocation detection Vikas Shivappa
2015-06-03 19:09 ` [PATCH 06/10] x86/intel_rdt: Add new cgroup and Class of service management Vikas Shivappa
2015-06-03 19:09 ` Vikas Shivappa [this message]
2015-06-03 19:09 ` [PATCH 08/10] x86/intel_rdt: Implement scheduling support for Intel RDT Vikas Shivappa
2015-06-03 19:10 ` [PATCH 09/10] x86/intel_rdt: Hot cpu support for Cache Allocation Vikas Shivappa
2015-06-03 19:10 ` [PATCH 10/10] x86/intel_rdt: Intel haswell Cache Allocation enumeration Vikas Shivappa
2015-06-03 22:38 ` [PATCH V8 00/10] New cpumask API and Intel Cache Allocation support Thomas Gleixner
2015-06-03 22:52   ` Vikas Shivappa
2015-06-05  0:01 Vikas Shivappa
2015-06-05  0:01 ` [PATCH 07/10] x86/intel_rdt: Add support for cache bit mask management Vikas Shivappa
2015-06-12 18:17 [PATCH V9 00/10] New cpumask API and Intel Cache Allocation support Vikas Shivappa
2015-06-12 18:17 ` [PATCH 07/10] x86/intel_rdt: Add support for cache bit mask management Vikas Shivappa
2015-06-23 22:56 [PATCH V10 00/10] New cpumask API and Intel Cache Allocation support Vikas Shivappa
2015-06-23 22:56 ` [PATCH 07/10] x86/intel_rdt: Add support for cache bit mask management Vikas Shivappa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1433358601-20255-8-git-send-email-vikas.shivappa@linux.intel.com \
    --to=vikas.shivappa@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=kanaka.d.juvva@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matt.fleming@intel.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=vikas.shivappa@intel.com \
    --cc=will.auld@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).