linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Reinette Chatre <reinette.chatre@intel.com>
To: tglx@linutronix.de, fenghua.yu@intel.com, tony.luck@intel.com,
	vikas.shivappa@linux.intel.com
Cc: gavin.hindman@intel.com, jithu.joseph@intel.com,
	dave.hansen@intel.com, mingo@redhat.com, hpa@zytor.com,
	x86@kernel.org, linux-kernel@vger.kernel.org,
	Reinette Chatre <reinette.chatre@intel.com>
Subject: [PATCH V4 34/38] x86/intel_rdt: Create debugfs files for pseudo-locking testing
Date: Tue, 22 May 2018 04:29:22 -0700	[thread overview]
Message-ID: <2da8730575c589eb7303c7b18a2721da40c446e2.1526987654.git.reinette.chatre@intel.com> (raw)
In-Reply-To: <cover.1526987654.git.reinette.chatre@intel.com>
In-Reply-To: <cover.1526987654.git.reinette.chatre@intel.com>

There is no simple yes/no test to determine if pseudo-locking was
successful. In order to test pseudo-locking we expose a debugfs file for
each pseudo-locked region that will record the latency of reading the
pseudo-locked memory at a stride of 32 bytes (hardcoded). These numbers
will give us an idea of locking was successful or not since they will
reflect cache hits and cache misses (hardware prefetching is disabled
during the test).

The new debugfs file "pseudo_lock_measure" will, when the
pseudo_lock_mem_latency tracepoint is enabled, record the latency of
accessing each cache line twice.

Kernel tracepoints offer us histograms that is a simple way to visualize
the memory access latency and immediately see any cache misses. For
example, the hist trigger below before trigger of the measurement
will display the memory access latency and instances at each
latency:
echo 'hist:keys=latency' > /sys/kernel/debug/tracing/events/resctrl/\
                           pseudo_lock_mem_latency/trigger
echo 1 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/enable
echo 1 > /sys/kernel/debug/resctrl/<newlock>/pseudo_lock_measure
echo 0 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/enable
cat /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/hist

Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
---
 arch/x86/Kconfig                                  |   1 +
 arch/x86/kernel/cpu/Makefile                      |   1 +
 arch/x86/kernel/cpu/intel_rdt.h                   |   5 +
 arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c       | 200 +++++++++++++++++++++-
 arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h |  23 +++
 5 files changed, 229 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4fa24d0cce5a..5c872580716e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -451,6 +451,7 @@ config INTEL_RDT
 config INTEL_RDT_DEBUGFS
 	bool "Intel RDT debugfs interface"
 	depends on INTEL_RDT
+	select HIST_TRIGGERS
 	select DEBUG_FS
 	help
 	  Enable the creation of Intel RDT debugfs files. In support of
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 071f50162727..88b87fb0d8e0 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
 obj-$(CONFIG_INTEL_RDT)	+= intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o
 obj-$(CONFIG_INTEL_RDT)	+= intel_rdt_ctrlmondata.o intel_rdt_pseudo_lock.o
+CFLAGS_intel_rdt_pseudo_lock.o = -I$(src)
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index c4ff638e3bc6..c8712446f185 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -138,6 +138,8 @@ struct mongroup {
  * @line_size:		size of the cache lines
  * @size:		size of pseudo-locked region in bytes
  * @kmem:		the kernel memory associated with pseudo-locked region
+ * @debugfs_dir:	pointer to this region's directory in the debugfs
+ *			filesystem
  */
 struct pseudo_lock_region {
 	struct rdt_resource	*r;
@@ -149,6 +151,9 @@ struct pseudo_lock_region {
 	unsigned int		line_size;
 	unsigned int		size;
 	void			*kmem;
+#ifdef CONFIG_INTEL_RDT_DEBUGFS
+	struct dentry		*debugfs_dir;
+#endif
 };
 
 /**
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index bced04dd90b6..0a6785f1a67b 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -14,6 +14,7 @@
 #include <linux/cacheinfo.h>
 #include <linux/cpu.h>
 #include <linux/cpumask.h>
+#include <linux/debugfs.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
 #include <asm/cacheflush.h>
@@ -21,6 +22,11 @@
 #include <asm/intel_rdt_sched.h>
 #include "intel_rdt.h"
 
+#ifdef CONFIG_INTEL_RDT_DEBUGFS
+#define CREATE_TRACE_POINTS
+#include "intel_rdt_pseudo_lock_event.h"
+#endif
+
 /*
  * MSR_MISC_FEATURE_CONTROL register enables the modification of hardware
  * prefetcher state. Details about this register can be found in the MSR
@@ -174,6 +180,9 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
 		plr->d->plr = NULL;
 	plr->d = NULL;
 	plr->cbm = 0;
+#ifdef CONFIG_INTEL_RDT_DEBUGFS
+	plr->debugfs_dir = NULL;
+#endif
 }
 
 /**
@@ -672,6 +681,163 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
 	return false;
 }
 
+#ifdef CONFIG_INTEL_RDT_DEBUGFS
+/**
+ * measure_cycles_lat_fn - Measure cycle latency to read pseudo-locked memory
+ * @_plr: pseudo-lock region to measure
+ *
+ * There is no deterministic way to test if a memory region is cached. One
+ * way is to measure how long it takes to read the memory, the speed of
+ * access is a good way to learn how close to the cpu the data was. Even
+ * more, if the prefetcher is disabled and the memory is read at a stride
+ * of half the cache line, then a cache miss will be easy to spot since the
+ * read of the first half would be significantly slower than the read of
+ * the second half.
+ *
+ * Return: 0. Waiter on waitqueue will be woken on completion.
+ */
+static int measure_cycles_lat_fn(void *_plr)
+{
+	struct pseudo_lock_region *plr = _plr;
+	u64 start, end;
+	u64 i;
+#ifdef CONFIG_KASAN
+	/*
+	 * The registers used for local register variables are also used
+	 * when KASAN is active. When KASAN is active we use a regular
+	 * variable to ensure we always use a valid pointer to access memory.
+	 * The cost is that accessing this pointer, which could be in
+	 * cache, will be included in the measurement of memory read latency.
+	 */
+	void *mem_r;
+#else
+#ifdef CONFIG_X86_64
+	register void *mem_r asm("rbx");
+#else
+	register void *mem_r asm("ebx");
+#endif /* CONFIG_X86_64 */
+#endif /* CONFIG_KASAN */
+
+	local_irq_disable();
+	/*
+	 * The wrmsr call may be reordered with the assignment below it.
+	 * Call wrmsr as directly as possible to avoid tracing clobbering
+	 * local register variable used for memory pointer.
+	 */
+	__wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+	mem_r = plr->kmem;
+	/*
+	 * Dummy execute of the time measurement to load the needed
+	 * instructions into the L1 instruction cache.
+	 */
+	start = rdtsc_ordered();
+	for (i = 0; i < plr->size; i += 32) {
+		start = rdtsc_ordered();
+		asm volatile("mov (%0,%1,1), %%eax\n\t"
+			     :
+			     : "r" (mem_r), "r" (i)
+			     : "%eax", "memory");
+		end = rdtsc_ordered();
+		trace_pseudo_lock_mem_latency((u32)(end - start));
+	}
+	wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
+	local_irq_enable();
+	plr->thread_done = 1;
+	wake_up_interruptible(&plr->lock_thread_wq);
+	return 0;
+}
+
+/**
+ * pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region
+ *
+ * The measurement of latency to access a pseudo-locked region should be
+ * done from a cpu that is associated with that pseudo-locked region.
+ * Determine which cpu is associated with this region and start a thread on
+ * that cpu to perform the measurement, wait for that thread to complete.
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp)
+{
+	struct pseudo_lock_region *plr = rdtgrp->plr;
+	struct task_struct *thread;
+	unsigned int cpu;
+	int ret;
+
+	cpus_read_lock();
+	mutex_lock(&rdtgroup_mutex);
+
+	if (rdtgrp->flags & RDT_DELETED) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	plr->thread_done = 0;
+	cpu = cpumask_first(&plr->d->cpu_mask);
+	if (!cpu_online(cpu)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
+					cpu_to_node(cpu),
+					"pseudo_lock_measure/%u", cpu);
+	if (IS_ERR(thread)) {
+		ret = PTR_ERR(thread);
+		goto out;
+	}
+	kthread_bind(thread, cpu);
+	wake_up_process(thread);
+
+	ret = wait_event_interruptible(plr->lock_thread_wq,
+				       plr->thread_done == 1);
+	if (ret < 0)
+		goto out;
+
+	ret = 0;
+
+out:
+	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
+	return ret;
+}
+
+static ssize_t pseudo_lock_measure_trigger(struct file *file,
+					   const char __user *user_buf,
+					   size_t count, loff_t *ppos)
+{
+	struct rdtgroup *rdtgrp = file->private_data;
+	size_t buf_size;
+	char buf[32];
+	int ret;
+	bool bv;
+
+	buf_size = min(count, (sizeof(buf) - 1));
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+	ret = strtobool(buf, &bv);
+	if (ret == 0 && bv) {
+		ret = debugfs_file_get(file->f_path.dentry);
+		if (unlikely(ret))
+			return ret;
+		ret = pseudo_lock_measure_cycles(rdtgrp);
+		if (ret == 0)
+			ret = count;
+		debugfs_file_put(file->f_path.dentry);
+	}
+
+	return ret;
+}
+
+static const struct file_operations pseudo_measure_fops = {
+	.write = pseudo_lock_measure_trigger,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+#endif /* CONFIG_INTEL_RDT_DEBUGFS */
+
 /**
  * rdtgroup_pseudo_lock_create - Create a pseudo-locked region
  * @rdtgrp: resource group to which pseudo-lock region belongs
@@ -692,6 +858,9 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
 {
 	struct pseudo_lock_region *plr = rdtgrp->plr;
 	struct task_struct *thread;
+#ifdef CONFIG_INTEL_RDT_DEBUGFS
+	struct dentry *entry;
+#endif
 	int ret;
 
 	ret = pseudo_lock_region_alloc(plr);
@@ -727,11 +896,33 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
 		goto out_region;
 	}
 
+#ifdef CONFIG_INTEL_RDT_DEBUGFS
+	plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name,
+					      debugfs_resctrl);
+	if (IS_ERR(plr->debugfs_dir)) {
+		ret = PTR_ERR(plr->debugfs_dir);
+		plr->debugfs_dir = NULL;
+		goto out_region;
+	}
+
+	entry = debugfs_create_file("pseudo_lock_measure", 0200,
+				    plr->debugfs_dir, rdtgrp,
+				    &pseudo_measure_fops);
+	if (IS_ERR(entry)) {
+		ret = PTR_ERR(entry);
+		goto out_debugfs;
+	}
+#endif
+
 	rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
 	closid_free(rdtgrp->closid);
 	ret = 0;
 	goto out;
 
+#ifdef CONFIG_INTEL_RDT_DEBUGFS
+out_debugfs:
+	debugfs_remove_recursive(plr->debugfs_dir);
+#endif
 out_region:
 	pseudo_lock_region_clear(plr);
 out:
@@ -754,12 +945,19 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
  */
 void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
 {
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP)
+	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 		/*
 		 * Default group cannot be a pseudo-locked region so we can
 		 * free closid here.
 		 */
 		closid_free(rdtgrp->closid);
+		goto free;
+	}
+
+#ifdef CONFIG_INTEL_RDT_DEBUGFS
+	debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
+#endif
 
+free:
 	pseudo_lock_free(rdtgrp);
 }
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h
new file mode 100644
index 000000000000..3cd0fa27d5fe
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM resctrl
+
+#if !defined(_TRACE_PSEUDO_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PSEUDO_LOCK_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(pseudo_lock_mem_latency,
+	    TP_PROTO(u32 latency),
+	    TP_ARGS(latency),
+	    TP_STRUCT__entry(__field(u32, latency)),
+	    TP_fast_assign(__entry->latency = latency),
+	    TP_printk("latency=%u", __entry->latency)
+	   );
+
+#endif /* _TRACE_PSEUDO_LOCK_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE intel_rdt_pseudo_lock_event
+#include <trace/define_trace.h>
-- 
2.13.6

  parent reply	other threads:[~2018-05-22 19:34 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-22 11:28 [PATCH V4 00/38] Intel(R) Resource Director Technology Cache Pseudo-Locking enabling Reinette Chatre
2018-05-22 11:28 ` [PATCH V4 01/38] x86/intel_rdt: Document new mode, size, and bit_usage Reinette Chatre
2018-05-22 11:28 ` [PATCH V4 02/38] x86/intel_rdt: Introduce RDT resource group mode Reinette Chatre
2018-05-22 11:28 ` [PATCH V4 03/38] x86/intel_rdt: Associate mode with each RDT resource group Reinette Chatre
2018-05-22 11:28 ` [PATCH V4 04/38] x86/intel_rdt: Introduce resource group's mode resctrl file Reinette Chatre
2018-05-22 11:28 ` [PATCH V4 05/38] x86/intel_rdt: Introduce test to determine if closid is in use Reinette Chatre
2018-05-22 11:28 ` [PATCH V4 06/38] x86/intel_rdt: Make useful functions available internally Reinette Chatre
2018-05-22 11:28 ` [PATCH V4 07/38] x86/intel_rdt: Initialize new resource group with sane defaults Reinette Chatre
2018-05-22 11:28 ` [PATCH V4 08/38] x86/intel_rdt: Introduce new "exclusive" mode Reinette Chatre
2018-05-22 11:28 ` [PATCH V4 09/38] x86/intel_rdt: Enable setting of exclusive mode Reinette Chatre
2018-05-22 11:28 ` [PATCH V4 10/38] x86/intel_rdt: Making CBM name and type more explicit Reinette Chatre
2018-05-22 11:28 ` [PATCH V4 11/38] x86/intel_rdt: Support flexible data to parsing callbacks Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 12/38] x86/intel_rdt: Ensure requested schemata respects mode Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 13/38] x86/intel_rdt: Introduce "bit_usage" to display cache allocations details Reinette Chatre
2018-05-22 21:03   ` Randy Dunlap
2018-05-22 21:09     ` Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 14/38] x86/intel_rdt: Display resource groups' allocations' size in bytes Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 15/38] x86/intel_rdt: Documentation for Cache Pseudo-Locking Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 16/38] x86/intel_rdt: Introduce the Cache Pseudo-Locking modes Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 17/38] x86/intel_rdt: Respect read and write access Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 18/38] x86/intel_rdt: Add utility to test if tasks assigned to resource group Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 19/38] x86/intel_rdt: Add utility to restrict/restore access to resctrl files Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 20/38] x86/intel_rdt: Protect against resource group changes during locking Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 21/38] x86/intel_rdt: Utilities to restrict/restore access to specific files Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 22/38] x86/intel_rdt: Add check to determine if monitoring in progress Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 23/38] x86/intel_rdt: Introduce pseudo-locked region Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 24/38] x86/intel_rdt: Support enter/exit of locksetup mode Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 25/38] x86/intel_rdt: Enable entering of pseudo-locksetup mode Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 26/38] x86/intel_rdt: Split resource group removal in two Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 27/38] x86/intel_rdt: Add utilities to test pseudo-locked region possibility Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 28/38] x86/intel_rdt: Discover supported platforms via prefetch disable bits Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 29/38] x86/intel_rdt: Pseudo-lock region creation/removal core Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 30/38] x86/intel_rdt: Support creation/removal of pseudo-locked region Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 31/38] x86/intel_rdt: resctrl files reflect pseudo-locked information Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 32/38] x86/intel_rdt: Ensure RDT cleanup on exit Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 33/38] x86/intel_rdt: Create resctrl debug area Reinette Chatre
2018-05-22 11:29 ` Reinette Chatre [this message]
2018-05-22 19:43   ` [PATCH V4 34/38] x86/intel_rdt: Create debugfs files for pseudo-locking testing Greg KH
2018-05-22 21:02     ` Reinette Chatre
2018-05-23  8:05       ` Greg KH
2018-05-23 17:19         ` Reinette Chatre
2018-05-23 17:27           ` Greg KH
2018-05-22 11:29 ` [PATCH V4 35/38] x86/intel_rdt: Create character device exposing pseudo-locked region Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 36/38] x86/intel_rdt: More precise L2 hit/miss measurements Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 37/38] x86/intel_rdt: Support L3 cache performance event of Broadwell Reinette Chatre
2018-05-22 11:29 ` [PATCH V4 38/38] x86/intel_rdt: Limit C-states dynamically when pseudo-locking active Reinette Chatre

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2da8730575c589eb7303c7b18a2721da40c446e2.1526987654.git.reinette.chatre@intel.com \
    --to=reinette.chatre@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=fenghua.yu@intel.com \
    --cc=gavin.hindman@intel.com \
    --cc=hpa@zytor.com \
    --cc=jithu.joseph@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=vikas.shivappa@linux.intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).