All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vikas Shivappa <vikas.shivappa@linux.intel.com>
To: vikas.shivappa@intel.com
Cc: x86@kernel.org, linux-kernel@vger.kernel.org, hpa@zytor.com,
	tglx@linutronix.de, mingo@kernel.org, tj@kernel.org,
	peterz@infradead.org, matt.fleming@intel.com,
	will.auld@intel.com, peter.zijlstra@intel.com,
	h.peter.anvin@intel.com, kanaka.d.juvva@intel.com,
	mtosatti@redhat.com, vikas.shivappa@linux.intel.com
Subject: [PATCH 5/7] x86/intel_rdt: Software Cache for IA32_PQR_MSR
Date: Mon, 11 May 2015 12:02:54 -0700	[thread overview]
Message-ID: <1431370976-31115-6-git-send-email-vikas.shivappa@linux.intel.com> (raw)
In-Reply-To: <1431370976-31115-1-git-send-email-vikas.shivappa@linux.intel.com>

This patch implements a common software cache for IA32_PQR_MSR(RMID 0:9,
    CLOSId 32:63) to be used by both Cache monitoring(CMT) and Cache
allocation. CMT updates the RMID where as cache_alloc updates the CLOSid
in the software cache.  During scheduling when the new RMID/CLOSid value
is different from the cached values, IA32_PQR_MSR is updated.  Since the
measured rdmsr latency for IA32_PQR_MSR is very high(~250 cycles) this
software cache is necessary to avoid reading the MSR to compare the
current CLOSid value. Caching reduces the frequency of MSR writes during
the scheduler hot path for cache allocation.  During CPU hotplug pqr
cache is updated to zero.

Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>

Conflicts:
	arch/x86/kernel/cpu/perf_event_intel_cqm.c
---
 arch/x86/include/asm/intel_rdt.h           |  9 ++++++---
 arch/x86/include/asm/rdt_common.h          | 13 +++++++++++++
 arch/x86/kernel/cpu/intel_rdt.c            | 30 +++++++++++++++++-------------
 arch/x86/kernel/cpu/perf_event_intel_cqm.c | 20 +++++++-------------
 4 files changed, 43 insertions(+), 29 deletions(-)
 create mode 100644 arch/x86/include/asm/rdt_common.h

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 589394b..f4372d8 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -4,16 +4,16 @@
 #ifdef CONFIG_CGROUP_RDT
 
 #include <linux/cgroup.h>
+#include <asm/rdt_common.h>
 
-#define MSR_IA32_PQR_ASSOC		0xc8f
 #define MAX_CBM_LENGTH			32
 #define IA32_L3_CBM_BASE		0xc90
 #define CBM_FROM_INDEX(x)		(IA32_L3_CBM_BASE + x)
-DECLARE_PER_CPU(unsigned int, x86_cpu_clos);
+
+DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
 extern struct static_key rdt_enable_key;
 extern void __rdt_sched_in(void);
 
-
 struct rdt_subsys_info {
 	/* Clos Bitmap to keep track of available CLOSids.*/
 	unsigned long *closmap;
@@ -67,6 +67,9 @@ static inline struct intel_rdt *task_rdt(struct task_struct *task)
  * IA32_PQR_MSR writes until the user starts really using the feature
  * ie creates a rdt cgroup directory and assigns a cache_mask thats
  * different from the root cgroup's cache_mask.
+ * - Caches the per cpu CLOSid values and does the MSR write only
+ * when a task with a different CLOSid is scheduled in. That
+ * means the task belongs to a different cgroup.
  * - Closids are allocated so that different cgroup directories
  * with same cache_mask gets the same CLOSid. This minimizes CLOSids
  * used and reduces MSR write frequency.
diff --git a/arch/x86/include/asm/rdt_common.h b/arch/x86/include/asm/rdt_common.h
new file mode 100644
index 0000000..33fd8ea
--- /dev/null
+++ b/arch/x86/include/asm/rdt_common.h
@@ -0,0 +1,13 @@
+#ifndef _X86_RDT_H_
+#define _X86_RDT_H_
+
+#define MSR_IA32_PQR_ASSOC	0x0c8f
+
+struct intel_pqr_state {
+	raw_spinlock_t	lock;
+	int		rmid;
+	int		clos;
+	int		cnt;
+};
+
+#endif
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index fe3ce4e..2415965 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -85,27 +85,28 @@ void __rdt_sched_in(void)
 {
 	struct task_struct *task = current;
 	struct intel_rdt *ir;
-	unsigned int clos;
-
-	/*
-	 * This needs to be fixed
-	 * to cache the whole PQR instead of just CLOSid.
-	 * PQR has closid in high 32 bits and CQM-RMID in low 10 bits.
-	 * Should not write a 0 to the low 10 bits of PQR
-	 * and corrupt RMID.
-	 */
-	clos = this_cpu_read(x86_cpu_clos);
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+	unsigned long flags;
 
+	raw_spin_lock_irqsave(&state->lock, flags);
 	rcu_read_lock();
 	ir = task_rdt(task);
-	if (ir->clos == clos) {
+	if (ir->clos == state->clos) {
 		rcu_read_unlock();
+		raw_spin_unlock_irqrestore(&state->lock, flags);
 		return;
 	}
 
-	wrmsr(MSR_IA32_PQR_ASSOC, 0, ir->clos);
-	this_cpu_write(x86_cpu_clos, ir->clos);
+	/*
+	 * PQR has closid in high 32 bits and CQM-RMID
+	 * in low 10 bits. Rewrite the exsting rmid from
+	 * software cache.
+	 */
+	wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, ir->clos);
+	state->clos = ir->clos;
 	rcu_read_unlock();
+	raw_spin_unlock_irqrestore(&state->lock, flags);
+
 }
 
 static void __clos_get(unsigned int closid)
@@ -372,6 +373,9 @@ static inline bool intel_rdt_update_cpumask(int cpu)
  */
 static inline void intel_rdt_cpu_start(int cpu)
 {
+	struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
+
+	state->clos = 0;
 	mutex_lock(&rdt_group_mutex);
 	if (intel_rdt_update_cpumask(cpu))
 		cbm_update_msrs(cpu);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index e4d1b8b..fd039899 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -7,22 +7,16 @@
 #include <linux/perf_event.h>
 #include <linux/slab.h>
 #include <asm/cpu_device_id.h>
+#include <asm/rdt_common.h>
 #include "perf_event.h"
 
-#define MSR_IA32_PQR_ASSOC	0x0c8f
 #define MSR_IA32_QM_CTR		0x0c8e
 #define MSR_IA32_QM_EVTSEL	0x0c8d
 
 static unsigned int cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
 
-struct intel_cqm_state {
-	raw_spinlock_t		lock;
-	int			rmid;
-	int			cnt;
-};
-
-static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
+DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
 
 /*
  * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
@@ -961,7 +955,7 @@ out:
 
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
-	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
 	unsigned int rmid = event->hw.cqm_rmid;
 	unsigned long flags;
 
@@ -978,14 +972,14 @@ static void intel_cqm_event_start(struct perf_event *event, int mode)
 		WARN_ON_ONCE(state->rmid);
 
 	state->rmid = rmid;
-	wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid);
+	wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, state->clos);
 
 	raw_spin_unlock_irqrestore(&state->lock, flags);
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
-	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
 	unsigned long flags;
 
 	if (event->hw.cqm_state & PERF_HES_STOPPED)
@@ -998,7 +992,7 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode)
 
 	if (!--state->cnt) {
 		state->rmid = 0;
-		wrmsrl(MSR_IA32_PQR_ASSOC, 0);
+		wrmsr(MSR_IA32_PQR_ASSOC, 0, state->clos);
 	} else {
 		WARN_ON_ONCE(!state->rmid);
 	}
@@ -1243,7 +1237,7 @@ static inline void cqm_pick_event_reader(int cpu)
 
 static void intel_cqm_cpu_prepare(unsigned int cpu)
 {
-	struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
+	struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	raw_spin_lock_init(&state->lock);
-- 
1.9.1


  parent reply	other threads:[~2015-05-11 19:07 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-11 19:02 [PATCH V7 0/7] x86/intel_rdt: Intel Cache Allocation support Vikas Shivappa
2015-05-11 19:02 ` [PATCH 1/7] x86/intel_rdt: Intel Cache Allocation detection Vikas Shivappa
2015-05-11 19:02 ` [PATCH 2/7] x86/intel_rdt: Adds support for Class of service management Vikas Shivappa
2015-05-15 19:18   ` Thomas Gleixner
2015-05-18 17:59     ` Vikas Shivappa
2015-05-18 18:41       ` Thomas Gleixner
2015-05-18 19:20         ` Borislav Petkov
2015-05-19 17:33           ` Vikas Shivappa
2015-05-19 20:35             ` Borislav Petkov
2015-05-18 19:44         ` Vikas Shivappa
2015-05-18 18:52       ` Thomas Gleixner
2015-05-18 19:27         ` Vikas Shivappa
2015-05-11 19:02 ` [PATCH 3/7] x86/intel_rdt: Add support for cache bit mask management Vikas Shivappa
2015-05-15 19:25   ` Thomas Gleixner
2015-05-18 19:17     ` Vikas Shivappa
2015-05-18 20:15       ` Thomas Gleixner
2015-05-18 21:09         ` Vikas Shivappa
2015-05-20 17:22         ` Vikas Shivappa
2015-05-20 19:02           ` Thomas Gleixner
2015-05-21  0:54             ` Thomas Gleixner
2015-05-21 16:36               ` Vikas Shivappa
2015-05-11 19:02 ` [PATCH 4/7] x86/intel_rdt: Implement scheduling support for Intel RDT Vikas Shivappa
2015-05-15 19:39   ` Thomas Gleixner
2015-05-18 18:01     ` Vikas Shivappa
2015-05-18 18:45       ` Thomas Gleixner
2015-05-18 19:18         ` Vikas Shivappa
2015-05-11 19:02 ` Vikas Shivappa [this message]
2015-05-15 20:15   ` [PATCH 5/7] x86/intel_rdt: Software Cache for IA32_PQR_MSR Thomas Gleixner
2015-05-20 17:18     ` Vikas Shivappa
2015-05-20 18:50       ` Thomas Gleixner
2015-05-20 20:43         ` Vikas Shivappa
2015-05-20 21:14           ` Thomas Gleixner
2015-05-20 22:51             ` Vikas Shivappa
2015-05-11 19:02 ` [PATCH 6/7] x86/intel_rdt: Intel haswell Cache Allocation enumeration Vikas Shivappa
2015-05-11 19:02 ` [PATCH 7/7] x86/intel_rdt: Add Cache Allocation documentation and usage guide Vikas Shivappa
2015-05-13 16:52 ` [PATCH V7 0/7] x86/intel_rdt: Intel Cache Allocation support Vikas Shivappa
  -- strict thread matches above, loose matches on Subject: below --
2015-05-02  1:36 [PATCH V6 0/7] x86/intel_rdt: Intel Cache Allocation Technology Vikas Shivappa
2015-05-02  1:36 ` [PATCH 5/7] x86/intel_rdt: Software Cache for IA32_PQR_MSR Vikas Shivappa
2015-03-12 23:16 [PATCH V5 0/7] x86/intel_rdt: Intel Cache Allocation Technology Vikas Shivappa
2015-03-12 23:16 ` [PATCH 5/7] x86/intel_rdt: Software Cache for IA32_PQR_MSR Vikas Shivappa
2015-02-24 23:16 [PATCH V4 0/7] x86/intel_rdt: Intel Cache Allocation Technology Vikas Shivappa
2015-02-24 23:16 ` [PATCH 5/7] x86/intel_rdt: Software Cache for IA32_PQR_MSR Vikas Shivappa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1431370976-31115-6-git-send-email-vikas.shivappa@linux.intel.com \
    --to=vikas.shivappa@linux.intel.com \
    --cc=h.peter.anvin@intel.com \
    --cc=hpa@zytor.com \
    --cc=kanaka.d.juvva@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matt.fleming@intel.com \
    --cc=mingo@kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=peter.zijlstra@intel.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=vikas.shivappa@intel.com \
    --cc=will.auld@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.