From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753195AbcD2Eqw (ORCPT ); Fri, 29 Apr 2016 00:46:52 -0400 Received: from mail-pa0-f53.google.com ([209.85.220.53]:34715 "EHLO mail-pa0-f53.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753128AbcD2Eqp (ORCPT ); Fri, 29 Apr 2016 00:46:45 -0400 From: David Carrillo-Cisneros To: Peter Zijlstra , Alexander Shishkin , Arnaldo Carvalho de Melo , Ingo Molnar Cc: Vikas Shivappa , Matt Fleming , Tony Luck , Stephane Eranian , Paul Turner , David Carrillo-Cisneros , x86@kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 26/32] perf/x86/intel/cqm: integrate CQM cgroups with scheduler Date: Thu, 28 Apr 2016 21:43:32 -0700 Message-Id: <1461905018-86355-27-git-send-email-davidcc@google.com> X-Mailer: git-send-email 2.8.0.rc3.226.g39d4020 In-Reply-To: <1461905018-86355-1-git-send-email-davidcc@google.com> References: <1461905018-86355-1-git-send-email-davidcc@google.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Allow monitored cgroups to update the PQR MSR during task switch even without an associated perf_event. The package RMID for the current monr associated with a monitored cgroup is written to hw during task switch (after perf_events is run) if perf_event did not write a RMID for an event. perf_event and any other caller of pqr_cache_update_rmid can update the CPU's RMID using one of two modes: - PQR_RMID_MODE_NOEVENT: A RMID that do not correspond to an event. e.g. the RMID of the root pmonr when no event is scheduled. - PQR_RMID_MODE_EVENT: A RMID used by an event. Set during pmu::add unset on pmu::del. This mode prevents from using a non-event cgroup RMID. This patch also introduces caching of writes to PQR MSR within the per-pcu pqr state variable. This interface to update RMIDs and CLOSIDs will be also utilized in upcoming versions of Intel's MBM and CAT drivers. Reviewed-by: Stephane Eranian Signed-off-by: David Carrillo-Cisneros --- arch/x86/events/intel/cqm.c | 65 +++++++++++++++++++++++++++++---------- arch/x86/events/intel/cqm.h | 2 -- arch/x86/include/asm/pqr_common.h | 53 +++++++++++++++++++++++++++---- arch/x86/kernel/cpu/pqr_common.c | 46 +++++++++++++++++++++++---- 4 files changed, 135 insertions(+), 31 deletions(-) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index daf9fdf..4ece0a4 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -198,19 +198,6 @@ static inline int cqm_prmid_update(struct prmid *prmid) return __cqm_prmid_update(prmid, __rmid_min_update_time); } -/* - * Updates caller cpu's cache. - */ -static inline void __update_pqr_prmid(struct prmid *prmid) -{ - struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); - - if (state->rmid == prmid->rmid) - return; - state->rmid = prmid->rmid; - wrmsr(MSR_IA32_PQR_ASSOC, prmid->rmid, state->closid); -} - static inline bool __valid_pkg_id(u16 pkg_id) { return pkg_id < PQR_MAX_NR_PKGS; @@ -2531,12 +2518,11 @@ static inline bool cqm_group_leader(struct perf_event *event) static inline void __intel_cqm_event_start( struct perf_event *event, union prmid_summary summary) { - u16 pkg_id = topology_physical_package_id(smp_processor_id()); if (!(event->hw.state & PERF_HES_STOPPED)) return; - event->hw.state &= ~PERF_HES_STOPPED; - __update_pqr_prmid(__prmid_from_rmid(pkg_id, summary.sched_rmid)); + + pqr_cache_update_rmid(summary.sched_rmid, PQR_RMID_MODE_EVENT); } static void intel_cqm_event_start(struct perf_event *event, int mode) @@ -2566,7 +2552,7 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode) /* Occupancy of CQM events is obtained at read. No need to read * when event is stopped since read on inactive cpus succeed. */ - __update_pqr_prmid(__prmid_from_rmid(pkg_id, summary.sched_rmid)); + pqr_cache_update_rmid(summary.sched_rmid, PQR_RMID_MODE_NOEVENT); } static int intel_cqm_event_add(struct perf_event *event, int mode) @@ -2977,6 +2963,8 @@ static void intel_cqm_cpu_starting(unsigned int cpu) state->rmid = 0; state->closid = 0; + state->next_rmid = 0; + state->next_closid = 0; /* XXX: lock */ /* XXX: Make sure this case is handled when hotplug happens. */ @@ -3152,6 +3140,12 @@ static int __init intel_cqm_init(void) pr_info("Intel CQM monitoring enabled with at least %u rmids per package.\n", min_max_rmid + 1); + /* Make sure pqr_common_enable_key is enabled after + * cqm_initialized_key. + */ + barrier(); + + static_branch_enable(&pqr_common_enable_key); return ret; error_init_mutex: @@ -3163,4 +3157,41 @@ error: return ret; } +/* Schedule task without a CQM perf_event. */ +inline void __intel_cqm_no_event_sched_in(void) +{ +#ifdef CONFIG_CGROUP_PERF + struct monr *monr; + struct pmonr *pmonr; + union prmid_summary summary; + u16 pkg_id = topology_physical_package_id(smp_processor_id()); + struct pmonr *root_pmonr = monr_hrchy_root->pmonrs[pkg_id]; + + /* Assume CQM enabled is likely given that PQR is enabled. */ + if (!static_branch_likely(&cqm_initialized_key)) + return; + + /* Safe to call from_task since we are in scheduler lock. */ + monr = monr_from_perf_cgroup(perf_cgroup_from_task(current, NULL)); + pmonr = monr->pmonrs[pkg_id]; + + /* Utilize most up to date pmonr summary. */ + monr_hrchy_get_next_prmid_summary(pmonr); + summary.value = atomic64_read(&pmonr->prmid_summary_atomic); + + if (!prmid_summary__is_mon_active(summary)) + goto no_rmid; + + if (WARN_ON_ONCE(!__valid_rmid(pkg_id, summary.sched_rmid))) + goto no_rmid; + + pqr_cache_update_rmid(summary.sched_rmid, PQR_RMID_MODE_NOEVENT); + return; + +no_rmid: + summary.value = atomic64_read(&root_pmonr->prmid_summary_atomic); + pqr_cache_update_rmid(summary.sched_rmid, PQR_RMID_MODE_NOEVENT); +#endif +} + device_initcall(intel_cqm_init); diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h index 0f3da94..e1f8bd0 100644 --- a/arch/x86/events/intel/cqm.h +++ b/arch/x86/events/intel/cqm.h @@ -82,8 +82,6 @@ union prmid_summary { }; }; -# define INVALID_RMID (-1) - /* A pmonr in (U)state has no sched_rmid, read_rmid can be 0 or INVALID_RMID * depending on whether monitoring is active or not. */ diff --git a/arch/x86/include/asm/pqr_common.h b/arch/x86/include/asm/pqr_common.h index f770637..abbb235 100644 --- a/arch/x86/include/asm/pqr_common.h +++ b/arch/x86/include/asm/pqr_common.h @@ -3,31 +3,72 @@ #if defined(CONFIG_INTEL_RDT) +#include #include #include +#include #define MSR_IA32_PQR_ASSOC 0x0c8f +#define INVALID_RMID (-1) +#define INVALID_CLOSID (-1) + + +extern struct static_key_false pqr_common_enable_key; + +enum intel_pqr_rmid_mode { + /* RMID has no perf_event associated. */ + PQR_RMID_MODE_NOEVENT = 0, + /* RMID has a perf_event associated. */ + PQR_RMID_MODE_EVENT +}; /** * struct intel_pqr_state - State cache for the PQR MSR - * @rmid: The cached Resource Monitoring ID - * @closid: The cached Class Of Service ID + * @rmid: Last rmid written to hw. + * @next_rmid: Next rmid to write to hw. + * @next_rmid_mode: Next rmid's mode. + * @closid: The current Class Of Service ID + * @next_closid: The Class Of Service ID to use. * * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always * contains both parts, so we need to cache them. * - * The cache also helps to avoid pointless updates if the value does - * not change. + * The cache also helps to avoid pointless updates if the value does not + * change. It also keeps track of the type of RMID set (event vs no event) + * used to determine when a cgroup RMID is required. */ struct intel_pqr_state { - u32 rmid; - u32 closid; + u32 rmid; + u32 next_rmid; + enum intel_pqr_rmid_mode next_rmid_mode; + u32 closid; + u32 next_closid; }; DECLARE_PER_CPU(struct intel_pqr_state, pqr_state); #define PQR_MAX_NR_PKGS 8 +void __pqr_update(void); + +inline void __intel_cqm_no_event_sched_in(void); + +inline void pqr_cache_update_rmid(u32 rmid, enum intel_pqr_rmid_mode mode); + +inline void pqr_cache_update_closid(u32 closid); + +static inline void pqr_update(void) +{ + if (static_branch_unlikely(&pqr_common_enable_key)) + __pqr_update(); +} + +#else + +static inline void pqr_update(void) +{ +} + #endif #endif diff --git a/arch/x86/kernel/cpu/pqr_common.c b/arch/x86/kernel/cpu/pqr_common.c index 9eff5d9..d91c127 100644 --- a/arch/x86/kernel/cpu/pqr_common.c +++ b/arch/x86/kernel/cpu/pqr_common.c @@ -1,9 +1,43 @@ #include -/* - * The cached intel_pqr_state is strictly per CPU and can never be - * updated from a remote CPU. Both functions which modify the state - * (intel_cqm_event_start and intel_cqm_event_stop) are called with - * interrupts disabled, which is sufficient for the protection. - */ DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); + +DEFINE_STATIC_KEY_FALSE(pqr_common_enable_key); + +inline void pqr_cache_update_rmid(u32 rmid, enum intel_pqr_rmid_mode mode) +{ + struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); + + state->next_rmid_mode = mode; + state->next_rmid = rmid; +} + +inline void pqr_cache_update_closid(u32 closid) +{ + struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); + + state->next_closid = closid; +} + +/* Update hw's RMID using cgroup's if perf_event did not. + * Sync pqr cache with MSR. + */ +inline void __pqr_update(void) +{ + struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); + + /* If perf_event has set a next_rmid that is used, do not try + * to obtain another one from current task. + */ + if (state->next_rmid_mode == PQR_RMID_MODE_NOEVENT) + __intel_cqm_no_event_sched_in(); + + /* __intel_cqm_no_event_sched_in might have changed next_rmid. */ + if (state->rmid == state->next_rmid && + state->closid == state->next_closid) + return; + + state->rmid = state->next_rmid; + state->closid = state->next_closid; + wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, state->closid); +} -- 2.8.0.rc3.226.g39d4020