From: David Carrillo-Cisneros <davidcc@google.com>
To: linux-kernel@vger.kernel.org
Cc: "x86@kernel.org" <x86@kernel.org>, Ingo Molnar <mingo@redhat.com>,
Thomas Gleixner <tglx@linutronix.de>,
Andi Kleen <ak@linux.intel.com>, Kan Liang <kan.liang@intel.com>,
Peter Zijlstra <peterz@infradead.org>,
Vegard Nossum <vegard.nossum@gmail.com>,
Marcelo Tosatti <mtosatti@redhat.com>,
Nilay Vaish <nilayvaish@gmail.com>, Borislav Petkov <bp@suse.de>,
Vikas Shivappa <vikas.shivappa@linux.intel.com>,
Ravi V Shankar <ravi.v.shankar@intel.com>,
Fenghua Yu <fenghua.yu@intel.com>, Paul Turner <pjt@google.com>,
Stephane Eranian <eranian@google.com>,
David Carrillo-Cisneros <davidcc@google.com>
Subject: [PATCH v3 41/46] perf/x86/intel/cmt: add rotation minimum progress SLO
Date: Sat, 29 Oct 2016 17:38:38 -0700 [thread overview]
Message-ID: <1477787923-61185-42-git-send-email-davidcc@google.com> (raw)
In-Reply-To: <1477787923-61185-1-git-send-email-davidcc@google.com>
Try to activate monrs at a __cmt_min_progress_rate rate.
Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
---
arch/x86/events/intel/cmt.c | 274 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 273 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index 8bf6aa5..ba82f95 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -79,6 +79,14 @@ static u64 __cmt_min_mon_slice;
static unsigned int __cmt_max_threshold; /* bytes */
/*
+ * Rotation SLO of all monrs events (including those without llc_occupancy):
+ * @__cmt_min_progrees_rate: Min numbers of pmonrs that must go to Active
+ * state per second, otherwise, recycling occupancy error is increased.
+ */
+#define CMT_DEFAULT_MIN_PROGRESS_RATE 2 /* pmonrs per sec */
+static unsigned int __cmt_min_progress_rate = CMT_DEFAULT_MIN_PROGRESS_RATE;
+
+/*
* If @pkgd == NULL, return first online, pkg_data in cmt_pkgs_data.
* Otherwise next online pkg_data or NULL if no more.
*/
@@ -466,6 +474,21 @@ static void pmonr_dep_dirty_to_active(struct pmonr *pmonr)
__pmonr_dep_to_active_helper(pmonr, rmids.read_rmid);
}
+/* dirty rmid must be clean enough to go to free_rmids. */
+static void pmonr_dep_dirty_to_dep_idle_helper(struct pmonr *pmonr,
+ union pmonr_rmids rmids)
+{
+ struct pkg_data *pkgd = pmonr->pkgd;
+
+ pmonr->pkgd->nr_dirty_rmids--;
+ __set_bit(rmids.read_rmid, pkgd->free_rmids);
+ list_move_tail(&pmonr->rot_entry, &pkgd->dep_idle_pmonrs);
+ pkgd->nr_dep_pmonrs++;
+
+ pmonr->state = PMONR_DEP_IDLE;
+ pmonr_set_rmids(pmonr, rmids.sched_rmid, INVALID_RMID);
+}
+
static void monr_dealloc(struct monr *monr)
{
u16 p, nr_pkgs = topology_max_packages();
@@ -1311,6 +1334,242 @@ static void smp_call_rmid_read(void *data)
atomic_set(&ccsd->on_read, 0);
}
+/*
+ * Try to reuse dirty rmid's for pmonrs at the front of dep_dirty_pmonrs.
+ */
+static int __try_activate_dep_dirty_pmonrs(struct pkg_data *pkgd)
+{
+ int reused = 0;
+ struct pmonr *pmonr;
+ struct list_head *lhead = &pkgd->dep_pmonrs;
+
+ lockdep_assert_held(&pkgd->lock);
+
+ while ((pmonr = list_first_entry_or_null(
+ lhead, struct pmonr, pkgd_deps_entry))) {
+ if (!pmonr || pmonr->state == PMONR_DEP_IDLE)
+ break;
+ pmonr_dep_dirty_to_active(pmonr);
+ reused++;
+ }
+
+ return reused;
+}
+
+static int try_activate_dep_dirty_pmonrs(struct pkg_data *pkgd)
+{
+ int nr_reused;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+ nr_reused = __try_activate_dep_dirty_pmonrs(pkgd);
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+ return nr_reused;
+}
+
+static inline int __try_use_free_rmid(struct pkg_data *pkgd, u32 rmid)
+{
+ struct pmonr *pmonr;
+
+ lockdep_assert_held(&pkgd->lock);
+
+ pmonr = list_first_entry_or_null(&pkgd->dep_idle_pmonrs,
+ struct pmonr, rot_entry);
+ if (!pmonr)
+ return 0;
+ /* The state transition will move the rmid to the active list. */
+ pmonr_dep_idle_to_active(pmonr, rmid);
+
+ return 1 + __try_activate_dep_dirty_pmonrs(pkgd);
+}
+
+static int __try_use_free_rmids(struct pkg_data *pkgd)
+{
+ int nr_activated = 0, nr_used, r;
+
+ for_each_set_bit(r, pkgd->free_rmids, CMT_MAX_NR_RMIDS) {
+ /* Removes the rmid from free list if succeeds. */
+ nr_used = __try_use_free_rmid(pkgd, r);
+ if (!nr_used)
+ break;
+ nr_activated += nr_used;
+ }
+
+ return nr_activated;
+}
+
+static bool is_rmid_dirty(struct pkg_data *pkgd, u32 rmid, bool do_read,
+ unsigned int dirty_thld, unsigned int *min_dirty)
+{
+ u64 val;
+
+ if (do_read && WARN_ON_ONCE(cmt_rmid_read(rmid, &val)))
+ return true;
+ if (val > dirty_thld) {
+ if (val < *min_dirty)
+ *min_dirty = val;
+ return true;
+ }
+
+ return false;
+}
+
+static int try_free_dep_dirty_pmonrs(struct pkg_data *pkgd,
+ bool do_read,
+ unsigned int dirty_thld,
+ unsigned int *min_dirty)
+{
+ struct pmonr *pmonr, *tmp;
+ union pmonr_rmids rmids;
+ int nr_activated = 0;
+ unsigned long flags;
+
+ /*
+ * No need to acquire pkg lock for pkgd->dep_dirty_pmonrs because
+ * rotation logic is the only user of this list.
+ */
+ list_for_each_entry_safe(pmonr, tmp,
+ &pkgd->dep_dirty_pmonrs, rot_entry) {
+ rmids.value = atomic64_read(&pmonr->atomic_rmids);
+ if (is_rmid_dirty(pkgd, rmids.read_rmid,
+ do_read, dirty_thld, min_dirty))
+ continue;
+
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+ pmonr_dep_dirty_to_dep_idle_helper(pmonr, rmids);
+ nr_activated += __try_use_free_rmid(pkgd, rmids.read_rmid);
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+ }
+
+ return nr_activated;
+}
+
+static int try_free_dirty_rmids(struct pkg_data *pkgd,
+ bool do_read,
+ unsigned int dirty_thld,
+ unsigned int *min_dirty,
+ unsigned long *rmids_bm)
+{
+ int nr_activated = 0, r;
+ unsigned long flags;
+
+ /*
+ * To avoid holding pkgd->lock while reading rmids in hw (slow), hold
+ * once and save all rmids that must be read. Then read them while
+ * unlocked.
+ */
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+ memcpy(rmids_bm, pkgd->dirty_rmids, CMT_MAX_NR_RMIDS_BYTES);
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+ for_each_set_bit(r, rmids_bm, CMT_MAX_NR_RMIDS) {
+ if (is_rmid_dirty(pkgd, r, do_read, dirty_thld, min_dirty))
+ continue;
+
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+
+ pkgd->nr_dirty_rmids--;
+ __clear_bit(r, pkgd->dirty_rmids);
+ __set_bit(r, pkgd->free_rmids);
+ nr_activated += __try_use_free_rmid(pkgd, r);
+
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+ }
+
+ return nr_activated;
+}
+
+/**
+ * __intel_cmt_rmid_rotate - Rotate rmids among pmonrs and handle dirty rmids.
+ * @pkgd: The package data to rotate rmids on.
+ * @active_goal: Target min nr of pmonrs to put in Active state.
+ * @max_dirty_thld: Upper bound for dirty_thld, in CMT cache units.
+ *
+ * The goals for each iteration of rotation logic are:
+ * 1) to activate @active_goal pmonrs.
+ *
+ * In order to activate Dep_{Dirty,Idle} pmonrs, rotation logic:
+ * 1) activate eligible Dep_Dirty pmonrs: These pmonrs can reuse their former
+ * rmid, even if it is not clean, without increasing the error.
+ * 2) take clean rmids from Dep_Dirty pmonrs and reuse them for other pmonrs
+ * or add them to pool of free rmids.
+ * 3) use free rmids to activate Dep_Idle pmonrs.
+ *
+ * Rotation logic also checks the occupancy of dirty rmids and, if now clean,
+ * uses them or adds them to free rmids.
+ * When a Dep_Idle pmonr is activated, any Dep_Dirty pmonr that is immediately
+ * after it in the pkg->dep_pmonrs list can be activated reusing its dirty
+ * rmid.
+ */
+static int __intel_cmt_rmid_rotate(struct pkg_data *pkgd,
+ unsigned int active_goal, unsigned int max_dirty_thld)
+{
+ unsigned int dirty_thld = 0, min_dirty, nr_activated;
+ unsigned int nr_dep_pmonrs;
+ unsigned long flags, *rmids_bm = NULL;
+ bool do_active_goal, read_dirty = true, dirty_is_max;
+
+ lockdep_assert_held(&pkgd->mutex);
+
+ rmids_bm = kzalloc(CMT_MAX_NR_RMIDS_BYTES, GFP_KERNEL);
+ if (!rmids_bm)
+ return -ENOMEM;
+
+ nr_activated = try_activate_dep_dirty_pmonrs(pkgd);
+
+again:
+ min_dirty = UINT_MAX;
+
+ /* retry every iteration since dirty_thld may have changed. */
+ nr_activated += try_free_dirty_rmids(pkgd, read_dirty,
+ dirty_thld, &min_dirty, rmids_bm);
+
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+ nr_activated += __try_use_free_rmids(pkgd);
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+ nr_activated += try_free_dep_dirty_pmonrs(pkgd, read_dirty,
+ dirty_thld, &min_dirty);
+
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+ nr_activated += __try_use_free_rmids(pkgd);
+ nr_dep_pmonrs = pkgd->nr_dep_pmonrs;
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+ /*
+ * If there is no room to increase dirty_thld, then no more dirty rmids
+ * could be reused and must give up active goal.
+ */
+ dirty_is_max = dirty_thld >= max_dirty_thld;
+ do_active_goal = nr_activated < active_goal && !dirty_is_max;
+
+ /*
+ * Since Dep_Dirty pmonrs have their own dirty rmid, only Dep_Idle
+ * pmonrs are waiting for a rmid to be available. Stop if no pmonr
+ * wait for rmid or no goals to pursue.
+ */
+ if (!nr_dep_pmonrs || !do_active_goal)
+ goto exit;
+
+ /*
+ * Try to activate more pmonrs by increasing the dirty threshold.
+ * Using the minimum observed occupancy in dirty rmids guarantees to
+ * recover at least one rmid per iteration.
+ */
+ if (do_active_goal) {
+ dirty_thld = min(min_dirty, max_dirty_thld);
+ /* do not read occupancy for dirty rmids twice. */
+ read_dirty = true;
+ goto again;
+ }
+
+exit:
+ kfree(rmids_bm);
+
+ return 0;
+}
+
static struct pmu intel_cmt_pmu;
/* Schedule rotation in one package. */
@@ -1360,10 +1619,20 @@ static bool intel_cmt_need_rmid_rotation(struct pkg_data *pkgd)
/*
* Rotation function, runs per-package.
+ * If rmids are needed in a package it will steal rmids from pmonr that have
+ * been active longer than __cmt_pre_mon_slice + __cmt_min_mon_slice.
+ * The hardware doesn't provide a way to free occupancy for a rmid that will
+ * be reused. Therefore, before reusing a rmid, it should stay unscheduled for
+ * a while, hoping that the cache lines counted towards this rmid will
+ * eventually be replaced and the rmid occupancy will decrease below
+ * __cmt_max_threshold.
*/
static void intel_cmt_rmid_rotation_work(struct work_struct *work)
{
struct pkg_data *pkgd;
+ /* not precise elapsed time, but good enough for rotation purposes. */
+ unsigned int elapsed_ms = intel_cmt_pmu.hrtimer_interval_ms;
+ unsigned int active_goal, max_dirty_threshold;
pkgd = container_of(to_delayed_work(work),
struct pkg_data, rotation_work);
@@ -1377,7 +1646,10 @@ static void intel_cmt_rmid_rotation_work(struct work_struct *work)
if (!intel_cmt_need_rmid_rotation(pkgd))
goto exit;
- /* To add call to rotation function in next patch */
+ active_goal = max(1u, (elapsed_ms * __cmt_min_progress_rate) / 1000);
+ max_dirty_threshold = READ_ONCE(__cmt_max_threshold) / cmt_l3_scale;
+
+ __intel_cmt_rmid_rotate(pkgd, active_goal, max_dirty_threshold);
if (intel_cmt_need_rmid_rotation(pkgd))
__intel_cmt_schedule_rotation_for_pkg(pkgd);
--
2.8.0.rc3.226.g39d4020
next prev parent reply other threads:[~2016-10-30 0:41 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-30 0:37 [PATCH v3 00/46] Cache Monitoring Technology (aka CQM) David Carrillo-Cisneros
2016-10-30 0:37 ` [PATCH v3 01/46] perf/x86/intel/cqm: remove previous version of CQM and MBM David Carrillo-Cisneros
2016-10-30 0:37 ` [PATCH v3 02/46] perf/x86/intel: rename CQM cpufeatures to CMT David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 03/46] x86/intel: add CONFIG_INTEL_RDT_M configuration flag David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 04/46] perf/x86/intel/cmt: add device initialization and CPU hotplug support David Carrillo-Cisneros
2016-11-10 15:19 ` Thomas Gleixner
2016-10-30 0:38 ` [PATCH v3 05/46] perf/x86/intel/cmt: add per-package locks David Carrillo-Cisneros
2016-11-10 21:23 ` Thomas Gleixner
2016-11-11 2:22 ` David Carrillo-Cisneros
2016-11-11 7:21 ` Peter Zijlstra
2016-11-11 7:32 ` Ingo Molnar
2016-11-11 9:41 ` Thomas Gleixner
2016-11-11 17:21 ` David Carrillo-Cisneros
2016-11-13 10:58 ` Thomas Gleixner
2016-11-15 4:53 ` David Carrillo-Cisneros
2016-11-16 19:00 ` Thomas Gleixner
2016-10-30 0:38 ` [PATCH v3 06/46] perf/x86/intel/cmt: add intel_cmt pmu David Carrillo-Cisneros
2016-11-10 21:27 ` Thomas Gleixner
2016-10-30 0:38 ` [PATCH v3 07/46] perf/core: add RDT Monitoring attributes to struct hw_perf_event David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 08/46] perf/x86/intel/cmt: add MONitored Resource (monr) initialization David Carrillo-Cisneros
2016-11-10 23:09 ` Thomas Gleixner
2016-10-30 0:38 ` [PATCH v3 09/46] perf/x86/intel/cmt: add basic monr hierarchy David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 10/46] perf/x86/intel/cmt: add Package MONitored Resource (pmonr) initialization David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 11/46] perf/x86/intel/cmt: add cmt_user_flags (uflags) to monr David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 12/46] perf/x86/intel/cmt: add per-package rmid pools David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 13/46] perf/x86/intel/cmt: add pmonr's Off and Unused states David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 14/46] perf/x86/intel/cmt: add Active and Dep_{Idle, Dirty} states David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 15/46] perf/x86/intel: encapsulate rmid and closid updates in pqr cache David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 16/46] perf/x86/intel/cmt: set sched rmid and complete pmu start/stop/add/del David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 17/46] perf/x86/intel/cmt: add uflag CMT_UF_NOLAZY_RMID David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 18/46] perf/core: add arch_info field to struct perf_cgroup David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 19/46] perf/x86/intel/cmt: add support for cgroup events David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 20/46] perf/core: add pmu::event_terminate David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 21/46] perf/x86/intel/cmt: use newly introduced event_terminate David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 22/46] perf/x86/intel/cmt: sync cgroups and intel_cmt device start/stop David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 23/46] perf/core: hooks to add architecture specific features in perf_cgroup David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 24/46] perf/x86/intel/cmt: add perf_cgroup_arch_css_{online,offline} David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 25/46] perf/x86/intel/cmt: add monr->flags and CMT_MONR_ZOMBIE David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 26/46] sched: introduce the finish_arch_pre_lock_switch() scheduler hook David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 27/46] perf/x86/intel: add pqr cache flags and intel_pqr_ctx_switch David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 28/46] perf,perf/x86,perf/powerpc,perf/arm,perf/*: add int error return to pmu::read David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 29/46] perf/x86/intel/cmt: add error handling to intel_cmt_event_read David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 30/46] perf/x86/intel/cmt: add asynchronous read for task events David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 31/46] perf/x86/intel/cmt: add subtree read for cgroup events David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 32/46] perf/core: Add PERF_EV_CAP_READ_ANY_{CPU_,}PKG flags David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 33/46] perf/x86/intel/cmt: use PERF_EV_CAP_READ_{,CPU_}PKG flags in Intel cmt David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 34/46] perf/core: introduce PERF_EV_CAP_CGROUP_NO_RECURSION David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 35/46] perf/x86/intel/cmt: use PERF_EV_CAP_CGROUP_NO_RECURSION in intel_cmt David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 36/46] perf/core: add perf_event cgroup hooks for subsystem attributes David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 37/46] perf/x86/intel/cmt: add cont_monitoring to perf cgroup David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 38/46] perf/x86/intel/cmt: introduce read SLOs for rotation David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 39/46] perf/x86/intel/cmt: add max_recycle_threshold sysfs attribute David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 40/46] perf/x86/intel/cmt: add rotation scheduled work David Carrillo-Cisneros
2016-10-30 0:38 ` David Carrillo-Cisneros [this message]
2016-10-30 0:38 ` [PATCH v3 42/46] perf/x86/intel/cmt: add rmid stealing David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 43/46] perf/x86/intel/cmt: add CMT_UF_NOSTEAL_RMID flag David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 44/46] perf/x86/intel/cmt: add debugfs intel_cmt directory David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 45/46] perf/stat: fix bug in handling events in error state David Carrillo-Cisneros
2016-10-30 0:38 ` [PATCH v3 46/46] perf/stat: revamp read error handling, snapshot and per_pkg events David Carrillo-Cisneros
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1477787923-61185-42-git-send-email-davidcc@google.com \
--to=davidcc@google.com \
--cc=ak@linux.intel.com \
--cc=bp@suse.de \
--cc=eranian@google.com \
--cc=fenghua.yu@intel.com \
--cc=kan.liang@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=mtosatti@redhat.com \
--cc=nilayvaish@gmail.com \
--cc=peterz@infradead.org \
--cc=pjt@google.com \
--cc=ravi.v.shankar@intel.com \
--cc=tglx@linutronix.de \
--cc=vegard.nossum@gmail.com \
--cc=vikas.shivappa@linux.intel.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).