All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Newman <peternewman@google.com>
To: Fenghua Yu <fenghua.yu@intel.com>,
	Reinette Chatre <reinette.chatre@intel.com>
Cc: Babu Moger <babu.moger@amd.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
	Stephane Eranian <eranian@google.com>,
	James Morse <james.morse@arm.com>,
	linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org,
	Peter Newman <peternewman@google.com>
Subject: [PATCH v1 3/9] x86/resctrl: Add resctrl_mbm_flush_cpu() to collect CPUs' MBM events
Date: Fri, 21 Apr 2023 16:17:17 +0200	[thread overview]
Message-ID: <20230421141723.2405942-4-peternewman@google.com> (raw)
In-Reply-To: <20230421141723.2405942-1-peternewman@google.com>

AMD implementations so far are only guaranteed to provide MBM event
counts for RMIDs which are currently assigned in CPUs' PQR_ASSOC MSRs.
Hardware can reallocate the counter resources for all other RMIDs' which
are not currently assigned to those which are, zeroing the event counts
of the unassigned RMIDs.

In practice, this makes it impossible to simultaneously calculate the
memory bandwidth speed of all RMIDs on a busy system where all RMIDs are
in use. Over a multiple-second measurement window, the RMID would need
to remain assigned in all of the L3 cache domains where it has been
assigned for the duration of the measurement, otherwise portions of the
final count will be zero. In general, it is not possible to bound the
number of RMIDs which will be assigned in an L3 domain over any interval
of time.

To provide reliable MBM counts on such systems, introduce "soft" RMIDs:
when enabled, each CPU is permanently assigned a hardware RMID whose
event counts are flushed to the current soft RMID during context
switches which result in a change in soft RMID as well as whenever
userspace requests the current event count for a domain.

Implement resctrl_mbm_flush_cpu(), which collects a domain's current MBM
event counts into its current software RMID. The delta for each CPU is
determined by tracking the previous event counts in per-CPU data.  The
software byte counts reside in the arch-independent mbm_state
structures.

Co-developed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Newman <peternewman@google.com>
---
 arch/x86/include/asm/resctrl.h         |  2 +
 arch/x86/kernel/cpu/resctrl/internal.h | 10 ++--
 arch/x86/kernel/cpu/resctrl/monitor.c  | 78 ++++++++++++++++++++++++++
 3 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 255a78d9d906..e7acf118d770 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -13,6 +13,7 @@
  * @cur_closid:	The cached Class Of Service ID
  * @default_rmid:	The user assigned Resource Monitoring ID
  * @default_closid:	The user assigned cached Class Of Service ID
+ * @hw_rmid:	The permanently-assigned RMID when soft RMIDs are in use
  *
  * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
  * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
@@ -27,6 +28,7 @@ struct resctrl_pqr_state {
 	u32			cur_closid;
 	u32			default_rmid;
 	u32			default_closid;
+	u32			hw_rmid;
 };
 
 DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 02a062558c67..256eee05d447 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -298,12 +298,14 @@ struct rftype {
  * @prev_bw:	The most recent bandwidth in MBps
  * @delta_bw:	Difference between the current and previous bandwidth
  * @delta_comp:	Indicates whether to compute the delta_bw
+ * @soft_rmid_bytes: Recent bandwidth count in bytes when using soft RMIDs
  */
 struct mbm_state {
-	u64	prev_bw_bytes;
-	u32	prev_bw;
-	u32	delta_bw;
-	bool	delta_comp;
+	u64		prev_bw_bytes;
+	u32		prev_bw;
+	u32		delta_bw;
+	bool		delta_comp;
+	atomic64_t	soft_rmid_bytes;
 };
 
 /**
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 2de8397f91cd..3671100d3cc7 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -404,6 +404,84 @@ static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 rmid,
 	}
 }
 
+struct mbm_soft_counter {
+	u64	prev_bytes;
+	bool	initialized;
+};
+
+struct mbm_flush_state {
+	struct mbm_soft_counter local;
+	struct mbm_soft_counter total;
+};
+
+DEFINE_PER_CPU(struct mbm_flush_state, flush_state);
+
+/*
+ * flushes the value of the cpu_rmid to the current soft rmid
+ */
+static void __mbm_flush(int evtid, struct rdt_resource *r, struct rdt_domain *d)
+{
+	struct mbm_flush_state *state = this_cpu_ptr(&flush_state);
+	u32 soft_rmid = this_cpu_ptr(&pqr_state)->cur_rmid;
+	u32 hw_rmid = this_cpu_ptr(&pqr_state)->hw_rmid;
+	struct mbm_soft_counter *counter;
+	struct mbm_state *m;
+	u64 val;
+
+	/* cache occupancy events are disabled in this mode */
+	WARN_ON(!is_mbm_event(evtid));
+
+	if (evtid == QOS_L3_MBM_LOCAL_EVENT_ID) {
+		counter = &state->local;
+	} else {
+		WARN_ON(evtid != QOS_L3_MBM_TOTAL_EVENT_ID);
+		counter = &state->total;
+	}
+
+	/*
+	 * Propagate the value read from the hw_rmid assigned to the current CPU
+	 * into the "soft" rmid associated with the current task or CPU.
+	 */
+	m = get_mbm_state(d, soft_rmid, evtid);
+	if (!m)
+		return;
+
+	if (resctrl_arch_rmid_read(r, d, hw_rmid, evtid, &val))
+		return;
+
+	/* Count bandwidth after the first successful counter read. */
+	if (counter->initialized) {
+		/* Assume that mbm_update() will prevent double-overflows. */
+		if (val != counter->prev_bytes)
+			atomic64_add(val - counter->prev_bytes,
+				     &m->soft_rmid_bytes);
+	} else {
+		counter->initialized = true;
+	}
+
+	counter->prev_bytes = val;
+}
+
+/*
+ * Called from context switch code __resctrl_sched_in() when the current soft
+ * RMID is changing or before reporting event counts to user space.
+ */
+void resctrl_mbm_flush_cpu(void)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+	int cpu = smp_processor_id();
+	struct rdt_domain *d;
+
+	d = get_domain_from_cpu(cpu, r);
+	if (!d)
+		return;
+
+	if (is_mbm_local_enabled())
+		__mbm_flush(QOS_L3_MBM_LOCAL_EVENT_ID, r, d);
+	if (is_mbm_total_enabled())
+		__mbm_flush(QOS_L3_MBM_TOTAL_EVENT_ID, r, d);
+}
+
 static int __mon_event_count(u32 rmid, struct rmid_read *rr)
 {
 	struct mbm_state *m;
-- 
2.40.0.634.g4ca3ef3211-goog


  parent reply	other threads:[~2023-04-21 14:18 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-21 14:17 [PATCH v1 0/9] x86/resctrl: Use soft RMIDs for reliable MBM on AMD Peter Newman
2023-04-21 14:17 ` [PATCH v1 1/9] selftests/resctrl: Verify all RMIDs count together Peter Newman
2023-04-21 14:17 ` [PATCH v1 2/9] x86/resctrl: Hold a spinlock in __rmid_read() on AMD Peter Newman
2023-05-11 21:35   ` Reinette Chatre
2023-05-12 13:23     ` Peter Newman
2023-05-12 15:23       ` Reinette Chatre
2023-04-21 14:17 ` Peter Newman [this message]
2023-05-11 21:37   ` [PATCH v1 3/9] x86/resctrl: Add resctrl_mbm_flush_cpu() to collect CPUs' MBM events Reinette Chatre
2023-05-12 13:25     ` Peter Newman
2023-05-12 15:26       ` Reinette Chatre
2023-05-15 14:42         ` Peter Newman
2023-05-17  0:05           ` Reinette Chatre
2023-12-01 20:56             ` Peter Newman
2023-12-05 21:57               ` Reinette Chatre
2023-12-06  0:33                 ` Peter Newman
2023-12-06  1:46                   ` Reinette Chatre
2023-12-06 18:38                     ` Peter Newman
2023-12-06 20:02                       ` Reinette Chatre
2023-05-16 14:18       ` Peter Newman
2023-05-16 14:27         ` Peter Newman
2023-06-01 14:45     ` Peter Newman
2023-06-01 17:14       ` Reinette Chatre
2023-04-21 14:17 ` [PATCH v1 4/9] x86/resctrl: Flush MBM event counts on soft RMID change Peter Newman
2023-05-11 21:37   ` Reinette Chatre
2023-04-21 14:17 ` [PATCH v1 5/9] x86/resctrl: Call mon_event_count() directly for soft RMIDs Peter Newman
2023-05-11 21:38   ` Reinette Chatre
2023-04-21 14:17 ` [PATCH v1 6/9] x86/resctrl: Create soft RMID version of __mon_event_count() Peter Newman
2023-05-11 21:38   ` Reinette Chatre
2023-04-21 14:17 ` [PATCH v1 7/9] x86/resctrl: Assign HW RMIDs to CPUs for soft RMID Peter Newman
2023-05-11 21:39   ` Reinette Chatre
2023-05-16 14:49     ` Peter Newman
2023-05-17  0:06       ` Reinette Chatre
2023-06-06 13:31         ` Peter Newman
2023-06-06 13:36   ` Peter Newman
2023-04-21 14:17 ` [PATCH v1 8/9] x86/resctrl: Use mbm_update() to push soft RMID counts Peter Newman
2023-05-11 21:40   ` Reinette Chatre
2023-06-02 12:42     ` Peter Newman
2023-06-06 13:48   ` Peter Newman
2023-04-21 14:17 ` [PATCH v1 9/9] x86/resctrl: Add mount option to enable soft RMID Peter Newman
2023-05-11 21:41   ` Reinette Chatre

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230421141723.2405942-4-peternewman@google.com \
    --to=peternewman@google.com \
    --cc=babu.moger@amd.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=eranian@google.com \
    --cc=fenghua.yu@intel.com \
    --cc=hpa@zytor.com \
    --cc=james.morse@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=reinette.chatre@intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.