From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756805AbcJ3Al5 (ORCPT ); Sat, 29 Oct 2016 20:41:57 -0400 Received: from mail-pf0-f176.google.com ([209.85.192.176]:36399 "EHLO mail-pf0-f176.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756431AbcJ3Akh (ORCPT ); Sat, 29 Oct 2016 20:40:37 -0400 From: David Carrillo-Cisneros To: linux-kernel@vger.kernel.org Cc: "x86@kernel.org" , Ingo Molnar , Thomas Gleixner , Andi Kleen , Kan Liang , Peter Zijlstra , Vegard Nossum , Marcelo Tosatti , Nilay Vaish , Borislav Petkov , Vikas Shivappa , Ravi V Shankar , Fenghua Yu , Paul Turner , Stephane Eranian , David Carrillo-Cisneros Subject: [PATCH v3 41/46] perf/x86/intel/cmt: add rotation minimum progress SLO Date: Sat, 29 Oct 2016 17:38:38 -0700 Message-Id: <1477787923-61185-42-git-send-email-davidcc@google.com> X-Mailer: git-send-email 2.8.0.rc3.226.g39d4020 In-Reply-To: <1477787923-61185-1-git-send-email-davidcc@google.com> References: <1477787923-61185-1-git-send-email-davidcc@google.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Try to activate monrs at a __cmt_min_progress_rate rate. Signed-off-by: David Carrillo-Cisneros --- arch/x86/events/intel/cmt.c | 274 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 273 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c index 8bf6aa5..ba82f95 100644 --- a/arch/x86/events/intel/cmt.c +++ b/arch/x86/events/intel/cmt.c @@ -79,6 +79,14 @@ static u64 __cmt_min_mon_slice; static unsigned int __cmt_max_threshold; /* bytes */ /* + * Rotation SLO of all monrs events (including those without llc_occupancy): + * @__cmt_min_progrees_rate: Min numbers of pmonrs that must go to Active + * state per second, otherwise, recycling occupancy error is increased. + */ +#define CMT_DEFAULT_MIN_PROGRESS_RATE 2 /* pmonrs per sec */ +static unsigned int __cmt_min_progress_rate = CMT_DEFAULT_MIN_PROGRESS_RATE; + +/* * If @pkgd == NULL, return first online, pkg_data in cmt_pkgs_data. * Otherwise next online pkg_data or NULL if no more. */ @@ -466,6 +474,21 @@ static void pmonr_dep_dirty_to_active(struct pmonr *pmonr) __pmonr_dep_to_active_helper(pmonr, rmids.read_rmid); } +/* dirty rmid must be clean enough to go to free_rmids. */ +static void pmonr_dep_dirty_to_dep_idle_helper(struct pmonr *pmonr, + union pmonr_rmids rmids) +{ + struct pkg_data *pkgd = pmonr->pkgd; + + pmonr->pkgd->nr_dirty_rmids--; + __set_bit(rmids.read_rmid, pkgd->free_rmids); + list_move_tail(&pmonr->rot_entry, &pkgd->dep_idle_pmonrs); + pkgd->nr_dep_pmonrs++; + + pmonr->state = PMONR_DEP_IDLE; + pmonr_set_rmids(pmonr, rmids.sched_rmid, INVALID_RMID); +} + static void monr_dealloc(struct monr *monr) { u16 p, nr_pkgs = topology_max_packages(); @@ -1311,6 +1334,242 @@ static void smp_call_rmid_read(void *data) atomic_set(&ccsd->on_read, 0); } +/* + * Try to reuse dirty rmid's for pmonrs at the front of dep_dirty_pmonrs. + */ +static int __try_activate_dep_dirty_pmonrs(struct pkg_data *pkgd) +{ + int reused = 0; + struct pmonr *pmonr; + struct list_head *lhead = &pkgd->dep_pmonrs; + + lockdep_assert_held(&pkgd->lock); + + while ((pmonr = list_first_entry_or_null( + lhead, struct pmonr, pkgd_deps_entry))) { + if (!pmonr || pmonr->state == PMONR_DEP_IDLE) + break; + pmonr_dep_dirty_to_active(pmonr); + reused++; + } + + return reused; +} + +static int try_activate_dep_dirty_pmonrs(struct pkg_data *pkgd) +{ + int nr_reused; + unsigned long flags; + + raw_spin_lock_irqsave(&pkgd->lock, flags); + nr_reused = __try_activate_dep_dirty_pmonrs(pkgd); + raw_spin_unlock_irqrestore(&pkgd->lock, flags); + + return nr_reused; +} + +static inline int __try_use_free_rmid(struct pkg_data *pkgd, u32 rmid) +{ + struct pmonr *pmonr; + + lockdep_assert_held(&pkgd->lock); + + pmonr = list_first_entry_or_null(&pkgd->dep_idle_pmonrs, + struct pmonr, rot_entry); + if (!pmonr) + return 0; + /* The state transition will move the rmid to the active list. */ + pmonr_dep_idle_to_active(pmonr, rmid); + + return 1 + __try_activate_dep_dirty_pmonrs(pkgd); +} + +static int __try_use_free_rmids(struct pkg_data *pkgd) +{ + int nr_activated = 0, nr_used, r; + + for_each_set_bit(r, pkgd->free_rmids, CMT_MAX_NR_RMIDS) { + /* Removes the rmid from free list if succeeds. */ + nr_used = __try_use_free_rmid(pkgd, r); + if (!nr_used) + break; + nr_activated += nr_used; + } + + return nr_activated; +} + +static bool is_rmid_dirty(struct pkg_data *pkgd, u32 rmid, bool do_read, + unsigned int dirty_thld, unsigned int *min_dirty) +{ + u64 val; + + if (do_read && WARN_ON_ONCE(cmt_rmid_read(rmid, &val))) + return true; + if (val > dirty_thld) { + if (val < *min_dirty) + *min_dirty = val; + return true; + } + + return false; +} + +static int try_free_dep_dirty_pmonrs(struct pkg_data *pkgd, + bool do_read, + unsigned int dirty_thld, + unsigned int *min_dirty) +{ + struct pmonr *pmonr, *tmp; + union pmonr_rmids rmids; + int nr_activated = 0; + unsigned long flags; + + /* + * No need to acquire pkg lock for pkgd->dep_dirty_pmonrs because + * rotation logic is the only user of this list. + */ + list_for_each_entry_safe(pmonr, tmp, + &pkgd->dep_dirty_pmonrs, rot_entry) { + rmids.value = atomic64_read(&pmonr->atomic_rmids); + if (is_rmid_dirty(pkgd, rmids.read_rmid, + do_read, dirty_thld, min_dirty)) + continue; + + raw_spin_lock_irqsave(&pkgd->lock, flags); + pmonr_dep_dirty_to_dep_idle_helper(pmonr, rmids); + nr_activated += __try_use_free_rmid(pkgd, rmids.read_rmid); + raw_spin_unlock_irqrestore(&pkgd->lock, flags); + } + + return nr_activated; +} + +static int try_free_dirty_rmids(struct pkg_data *pkgd, + bool do_read, + unsigned int dirty_thld, + unsigned int *min_dirty, + unsigned long *rmids_bm) +{ + int nr_activated = 0, r; + unsigned long flags; + + /* + * To avoid holding pkgd->lock while reading rmids in hw (slow), hold + * once and save all rmids that must be read. Then read them while + * unlocked. + */ + raw_spin_lock_irqsave(&pkgd->lock, flags); + memcpy(rmids_bm, pkgd->dirty_rmids, CMT_MAX_NR_RMIDS_BYTES); + raw_spin_unlock_irqrestore(&pkgd->lock, flags); + + for_each_set_bit(r, rmids_bm, CMT_MAX_NR_RMIDS) { + if (is_rmid_dirty(pkgd, r, do_read, dirty_thld, min_dirty)) + continue; + + raw_spin_lock_irqsave(&pkgd->lock, flags); + + pkgd->nr_dirty_rmids--; + __clear_bit(r, pkgd->dirty_rmids); + __set_bit(r, pkgd->free_rmids); + nr_activated += __try_use_free_rmid(pkgd, r); + + raw_spin_unlock_irqrestore(&pkgd->lock, flags); + } + + return nr_activated; +} + +/** + * __intel_cmt_rmid_rotate - Rotate rmids among pmonrs and handle dirty rmids. + * @pkgd: The package data to rotate rmids on. + * @active_goal: Target min nr of pmonrs to put in Active state. + * @max_dirty_thld: Upper bound for dirty_thld, in CMT cache units. + * + * The goals for each iteration of rotation logic are: + * 1) to activate @active_goal pmonrs. + * + * In order to activate Dep_{Dirty,Idle} pmonrs, rotation logic: + * 1) activate eligible Dep_Dirty pmonrs: These pmonrs can reuse their former + * rmid, even if it is not clean, without increasing the error. + * 2) take clean rmids from Dep_Dirty pmonrs and reuse them for other pmonrs + * or add them to pool of free rmids. + * 3) use free rmids to activate Dep_Idle pmonrs. + * + * Rotation logic also checks the occupancy of dirty rmids and, if now clean, + * uses them or adds them to free rmids. + * When a Dep_Idle pmonr is activated, any Dep_Dirty pmonr that is immediately + * after it in the pkg->dep_pmonrs list can be activated reusing its dirty + * rmid. + */ +static int __intel_cmt_rmid_rotate(struct pkg_data *pkgd, + unsigned int active_goal, unsigned int max_dirty_thld) +{ + unsigned int dirty_thld = 0, min_dirty, nr_activated; + unsigned int nr_dep_pmonrs; + unsigned long flags, *rmids_bm = NULL; + bool do_active_goal, read_dirty = true, dirty_is_max; + + lockdep_assert_held(&pkgd->mutex); + + rmids_bm = kzalloc(CMT_MAX_NR_RMIDS_BYTES, GFP_KERNEL); + if (!rmids_bm) + return -ENOMEM; + + nr_activated = try_activate_dep_dirty_pmonrs(pkgd); + +again: + min_dirty = UINT_MAX; + + /* retry every iteration since dirty_thld may have changed. */ + nr_activated += try_free_dirty_rmids(pkgd, read_dirty, + dirty_thld, &min_dirty, rmids_bm); + + raw_spin_lock_irqsave(&pkgd->lock, flags); + nr_activated += __try_use_free_rmids(pkgd); + raw_spin_unlock_irqrestore(&pkgd->lock, flags); + + nr_activated += try_free_dep_dirty_pmonrs(pkgd, read_dirty, + dirty_thld, &min_dirty); + + raw_spin_lock_irqsave(&pkgd->lock, flags); + nr_activated += __try_use_free_rmids(pkgd); + nr_dep_pmonrs = pkgd->nr_dep_pmonrs; + raw_spin_unlock_irqrestore(&pkgd->lock, flags); + + /* + * If there is no room to increase dirty_thld, then no more dirty rmids + * could be reused and must give up active goal. + */ + dirty_is_max = dirty_thld >= max_dirty_thld; + do_active_goal = nr_activated < active_goal && !dirty_is_max; + + /* + * Since Dep_Dirty pmonrs have their own dirty rmid, only Dep_Idle + * pmonrs are waiting for a rmid to be available. Stop if no pmonr + * wait for rmid or no goals to pursue. + */ + if (!nr_dep_pmonrs || !do_active_goal) + goto exit; + + /* + * Try to activate more pmonrs by increasing the dirty threshold. + * Using the minimum observed occupancy in dirty rmids guarantees to + * recover at least one rmid per iteration. + */ + if (do_active_goal) { + dirty_thld = min(min_dirty, max_dirty_thld); + /* do not read occupancy for dirty rmids twice. */ + read_dirty = true; + goto again; + } + +exit: + kfree(rmids_bm); + + return 0; +} + static struct pmu intel_cmt_pmu; /* Schedule rotation in one package. */ @@ -1360,10 +1619,20 @@ static bool intel_cmt_need_rmid_rotation(struct pkg_data *pkgd) /* * Rotation function, runs per-package. + * If rmids are needed in a package it will steal rmids from pmonr that have + * been active longer than __cmt_pre_mon_slice + __cmt_min_mon_slice. + * The hardware doesn't provide a way to free occupancy for a rmid that will + * be reused. Therefore, before reusing a rmid, it should stay unscheduled for + * a while, hoping that the cache lines counted towards this rmid will + * eventually be replaced and the rmid occupancy will decrease below + * __cmt_max_threshold. */ static void intel_cmt_rmid_rotation_work(struct work_struct *work) { struct pkg_data *pkgd; + /* not precise elapsed time, but good enough for rotation purposes. */ + unsigned int elapsed_ms = intel_cmt_pmu.hrtimer_interval_ms; + unsigned int active_goal, max_dirty_threshold; pkgd = container_of(to_delayed_work(work), struct pkg_data, rotation_work); @@ -1377,7 +1646,10 @@ static void intel_cmt_rmid_rotation_work(struct work_struct *work) if (!intel_cmt_need_rmid_rotation(pkgd)) goto exit; - /* To add call to rotation function in next patch */ + active_goal = max(1u, (elapsed_ms * __cmt_min_progress_rate) / 1000); + max_dirty_threshold = READ_ONCE(__cmt_max_threshold) / cmt_l3_scale; + + __intel_cmt_rmid_rotate(pkgd, active_goal, max_dirty_threshold); if (intel_cmt_need_rmid_rotation(pkgd)) __intel_cmt_schedule_rotation_for_pkg(pkgd); -- 2.8.0.rc3.226.g39d4020