[PATCH 4/7] x86/intel_rdt: Implement scheduling support for Intel RDT

From: Vikas Shivappa <vikas.shivappa@linux.intel.com>
To: vikas.shivappa@intel.com
Cc: x86@kernel.org, linux-kernel@vger.kernel.org, hpa@zytor.com,
	tglx@linutronix.de, mingo@kernel.org, tj@kernel.org,
	peterz@infradead.org, matt.fleming@intel.com,
	will.auld@intel.com, peter.zijlstra@intel.com,
	h.peter.anvin@intel.com, kanaka.d.juvva@intel.com,
	mtosatti@redhat.com, vikas.shivappa@linux.intel.com
Subject: [PATCH 4/7] x86/intel_rdt: Implement scheduling support for Intel RDT
Date: Mon, 11 May 2015 12:02:53 -0700	[thread overview]
Message-ID: <1431370976-31115-5-git-send-email-vikas.shivappa@linux.intel.com> (raw)
In-Reply-To: <1431370976-31115-1-git-send-email-vikas.shivappa@linux.intel.com>

Adds support for IA32_PQR_ASSOC MSR writes during task scheduling.
For Cache Allocation, MSR write would let the task fill in the cache
'subset' represented by the cgroup's cache_mask.

The high 32 bits in the per processor MSR IA32_PQR_ASSOC represents the
CLOSid. During context switch kernel implements this by writing the
CLOSid of the cgroup to which the task belongs to the CPU's
IA32_PQR_ASSOC MSR.

The following considerations are done for the PQR MSR write so that it
minimally impacts scheduler hot path:
- This path does not exist on any non-intel platforms.
- On Intel platforms, this would not exist by default unless CGROUP_RDT
is enabled.
- remains a no-op when CGROUP_RDT is enabled and intel SKU does not
support the feature.
- When feature is available and enabled, never does MSR write till the
user manually creates a cgroup directory *and* assigns a cache_mask
different from root cgroup directory.  Since the child node inherits the
parents cache mask , by cgroup creation there is no scheduling hot path
impact from the new cgroup.
- MSR write is only done when there is a task with different Closid is
scheduled on the CPU. Typically if the task groups are bound to be
scheduled on a set of CPUs , the number of MSR writes is greatly
reduced.
- For cgroup directories having same cache_mask the CLOSids are reused.
This minimizes the number of CLOSids used and hence reduces the MSR
write frequency.

Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
---
 arch/x86/include/asm/intel_rdt.h | 44 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/switch_to.h |  3 +++
 arch/x86/kernel/cpu/intel_rdt.c  | 30 +++++++++++++++++++++++++++
 3 files changed, 77 insertions(+)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 9e9dbbe..589394b 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -4,9 +4,15 @@
 #ifdef CONFIG_CGROUP_RDT
 
 #include <linux/cgroup.h>
+
+#define MSR_IA32_PQR_ASSOC		0xc8f
 #define MAX_CBM_LENGTH			32
 #define IA32_L3_CBM_BASE		0xc90
 #define CBM_FROM_INDEX(x)		(IA32_L3_CBM_BASE + x)
+DECLARE_PER_CPU(unsigned int, x86_cpu_clos);
+extern struct static_key rdt_enable_key;
+extern void __rdt_sched_in(void);
+
 
 struct rdt_subsys_info {
 	/* Clos Bitmap to keep track of available CLOSids.*/
@@ -24,6 +30,11 @@ struct clos_cbm_map {
 	unsigned int clos_refcnt;
 };
 
+static inline bool rdt_enabled(void)
+{
+	return static_key_false(&rdt_enable_key);
+}
+
 /*
  * Return rdt group corresponding to this container.
  */
@@ -37,5 +48,38 @@ static inline struct intel_rdt *parent_rdt(struct intel_rdt *ir)
 	return css_rdt(ir->css.parent);
 }
 
+/*
+ * Return rdt group to which this task belongs.
+ */
+static inline struct intel_rdt *task_rdt(struct task_struct *task)
+{
+	return css_rdt(task_css(task, intel_rdt_cgrp_id));
+}
+
+/*
+ * intel_rdt_sched_in() - Writes the task's CLOSid to IA32_PQR_MSR
+ *
+ * Following considerations are made so that this has minimal impact
+ * on scheduler hot path:
+ * - This will stay as no-op unless we are running on an Intel SKU
+ * which supports L3 cache allocation.
+ * - When support is present and enabled, does not do any
+ * IA32_PQR_MSR writes until the user starts really using the feature
+ * ie creates a rdt cgroup directory and assigns a cache_mask thats
+ * different from the root cgroup's cache_mask.
+ * - Closids are allocated so that different cgroup directories
+ * with same cache_mask gets the same CLOSid. This minimizes CLOSids
+ * used and reduces MSR write frequency.
+ */
+static inline void intel_rdt_sched_in(void)
+{
+	if (rdt_enabled())
+		__rdt_sched_in();
+}
+
+#else
+
+static inline void intel_rdt_sched_in(struct task_struct *task) {}
+
 #endif
 #endif
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 751bf4b..9149577 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -8,6 +8,9 @@ struct tss_struct;
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		      struct tss_struct *tss);
 
+#include <asm/intel_rdt.h>
+#define finish_arch_switch(prev)	intel_rdt_sched_in()
+
 #ifdef CONFIG_X86_32
 
 #ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 125318d..fe3ce4e 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -35,6 +35,8 @@ static struct clos_cbm_map *ccmap;
 static struct rdt_subsys_info rdtss_info;
 static DEFINE_MUTEX(rdt_group_mutex);
 struct intel_rdt rdt_root_group;
+struct static_key __read_mostly rdt_enable_key = STATIC_KEY_INIT_FALSE;
+DEFINE_PER_CPU(unsigned int, x86_cpu_clos);
 
 /*
  * Mask of CPUs for writing CBM values. We only need one per-socket.
@@ -79,6 +81,33 @@ static void intel_rdt_free_closid(unsigned int clos)
 	clear_bit(clos, rdtss_info.closmap);
 }
 
+void __rdt_sched_in(void)
+{
+	struct task_struct *task = current;
+	struct intel_rdt *ir;
+	unsigned int clos;
+
+	/*
+	 * This needs to be fixed
+	 * to cache the whole PQR instead of just CLOSid.
+	 * PQR has closid in high 32 bits and CQM-RMID in low 10 bits.
+	 * Should not write a 0 to the low 10 bits of PQR
+	 * and corrupt RMID.
+	 */
+	clos = this_cpu_read(x86_cpu_clos);
+
+	rcu_read_lock();
+	ir = task_rdt(task);
+	if (ir->clos == clos) {
+		rcu_read_unlock();
+		return;
+	}
+
+	wrmsr(MSR_IA32_PQR_ASSOC, 0, ir->clos);
+	this_cpu_write(x86_cpu_clos, ir->clos);
+	rcu_read_unlock();
+}
+
 static void __clos_get(unsigned int closid)
 {
 	struct clos_cbm_map *ccm = &ccmap[closid];
@@ -433,6 +462,7 @@ static int __init intel_rdt_late_init(void)
 	__hotcpu_notifier(intel_rdt_cpu_notifier, 0);
 
 	cpu_notifier_register_done();
+	static_key_slow_inc(&rdt_enable_key);
 
 	pr_info("Max bitmask length:%u,Max ClosIds: %u\n", max_cbm_len, maxid);
 out_err:
-- 
1.9.1