All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 1/9] early pt: Basic support for early intel processor trace
@ 2017-09-30  3:22 Luming Yu
  0 siblings, 0 replies; only message in thread
From: Luming Yu @ 2017-09-30  3:22 UTC (permalink / raw)
  To: LKML

[-- Attachment #1: Type: text/plain, Size: 483 bytes --]

with zero dependencies on other technologies in linux kernel,
1.Per cpu dump for basic block level code analysis
2.I can trace any code including myself right after it's enabled

Signed-off-by: Luming Yu <luming.yu@intel.com>
---
 arch/x86/events/Kconfig          |   6 +
 arch/x86/events/intel/Makefile   |   1 +
 arch/x86/events/intel/early_pt.c | 337 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 344 insertions(+)
 create mode 100644 arch/x86/events/intel/early_pt.c

[-- Attachment #2: 0001-Basic-support-for-early-intel-processor-trace-featur.patch --]
[-- Type: application/octet-stream, Size: 10045 bytes --]

From cc40bf9273f68814be59a3c2f908d9b3e34f4f22 Mon Sep 17 00:00:00 2001
From: Luming Yu <luming.yu@intel.com>
Date: Fri, 29 Sep 2017 21:50:11 +0800
Subject: [PATCH v1 1/9] early pt: Basic support for early intel processor trace 

with zero dependencies on other technologies in linux kernel, 
1.Per cpu dump for basic block level code analysis 
2.I can trace any code including myself right after it's enabled

Signed-off-by: Luming Yu <luming.yu@intel.com>
---
 arch/x86/events/Kconfig          |   6 +
 arch/x86/events/intel/Makefile   |   1 +
 arch/x86/events/intel/early_pt.c | 337 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 344 insertions(+)
 create mode 100644 arch/x86/events/intel/early_pt.c

diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
index 98397db..4205918 100644
--- a/arch/x86/events/Kconfig
+++ b/arch/x86/events/Kconfig
@@ -32,5 +32,11 @@ config PERF_EVENTS_AMD_POWER
 	  Currently, it leverages X86_FEATURE_ACC_POWER
 	  (CPUID Fn8000_0007_EDX[12]) interface to calculate the
 	  average power consumption on Family 15h processors.
+config EARLY_PT
+	tristate "Intel early PT"
+	depends on CPU_SUP_INTEL
+	default n
+	---help---
+	  Early PT support.
 
 endmenu
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
index e9d8520..d04d8ec 100644
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_CPU_SUP_INTEL)		+= core.o bts.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= ds.o knc.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= lbr.o p4.o p6.o pt.o
+obj-$(CONFIG_EARLY_PT)			+= early_pt.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)	+= intel-rapl-perf.o
 intel-rapl-perf-objs			:= rapl.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel-uncore.o
diff --git a/arch/x86/events/intel/early_pt.c b/arch/x86/events/intel/early_pt.c
new file mode 100644
index 0000000..67513ed
--- /dev/null
+++ b/arch/x86/events/intel/early_pt.c
@@ -0,0 +1,337 @@
+/*I can trace myself !*/
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/miscdevice.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/gfp.h>
+#include <linux/uaccess.h>
+#include <asm/msr.h>
+
+#include "../perf_event.h"
+#include "pt.h"
+
+/* ioctl command */
+#define PT_SET_CPU	9901
+#define PT_GET_SIZE	9902
+#define PT_GET_OFFSET	9903
+#define PT_STOP		9904
+#define PT_START	9905
+
+#define PT_ERROR	BIT_ULL(4)
+#define MTC_MASK	(0xf << 14)
+#define CYC_MASK	(0xf << 19)
+#define PSB_MASK	(0xf << 24)
+
+#define ADDR0_SHIFT	32
+#define ADDR1_SHIFT	36
+#define ADDR0_MASK	(0x1ULL << ADDR0_SHIFT)
+#define ADDR1_MASK	(0x1ULL << ADDR1_SHIFT)
+
+#define MSR_IA32_CR3_MATCH	0x00000572
+
+#define CYC_EN		BIT_ULL(1)
+#define MTC_EN		BIT_ULL(9)
+
+#define TOPA_STOP	BIT_ULL(4)
+#define TOPA_INT	BIT_ULL(2)
+#define TOPA_END	BIT_ULL(0)
+#define TOPA_SIZE_SHIFT 6
+
+static int	early_pt_buffer_order = 9;
+static bool	single_range = true;
+
+static DEFINE_PER_CPU(bool, early_pt_running);
+static DEFINE_PER_CPU(u64, pt_offset);
+static DEFINE_PER_CPU(u64, pt_buffer_cpu);
+
+static bool	has_cr3_match;
+static unsigned	addr_cfg_max;
+static int	pt_num_buffers;
+static unsigned psb_freq_mask = 0;
+static unsigned cyc_thresh_mask = 0;
+static unsigned mtc_freq_mask = 0;
+static unsigned addr_range_num = 0;
+
+static int early_pt_buffer_init(int cpu)
+{
+	u64	pt_buffer;
+
+	pt_buffer = per_cpu(pt_buffer_cpu, cpu);
+
+	if (!pt_buffer) {
+		pt_buffer = __get_free_pages(GFP_KERNEL |
+			__GFP_NOWARN | __GFP_ZERO, early_pt_buffer_order);
+		if (!pt_buffer) {
+			pr_err("cpu %d, cannot allocate %ld KB buffer\n", cpu,
+					(PAGE_SIZE << early_pt_buffer_order) / 1024);
+			return -ENOMEM;
+		}
+		per_cpu(pt_buffer_cpu, cpu) = pt_buffer;
+	} else
+		memset((void *)pt_buffer, 0, PAGE_SIZE << early_pt_buffer_order);
+
+	return 0;
+}
+
+static void early_pt_buffer_exit(void *arg)
+{
+	long	cpu = (long) arg;
+
+	if (per_cpu(pt_buffer_cpu, cpu)) {
+		free_pages(per_cpu(pt_buffer_cpu, cpu), early_pt_buffer_order);
+		per_cpu(pt_buffer_cpu, cpu) = 0;
+	}
+
+	return;
+}
+
+/*
+ * cpuid Intel PT detection and Caps enumeration.
+ */
+static int early_pt_cpuid_caps(void)
+{
+	unsigned a, b, c, d;
+	unsigned a1, b1, c1, d1;
+
+	/*
+	 * cpuid func: 0x14 for PT
+	 */
+	cpuid(0, &a, &b, &c, &d);
+	if (a < 0x14) {
+		pr_info("No cpuid func 0x14 (for PT) available\n");
+		return -ENODEV;
+	}
+	cpuid_count(0x07, 0, &a, &b, &c, &d);
+	if ((b & BIT(25)) == 0) {
+		pr_info("No PT available\n");
+		return -ENODEV;
+	}
+	cpuid_count(0x14, 0, &a, &b, &c, &d);
+	if ((c & BIT(0)) == 0) {
+		pr_info("No ToPA available\n");
+		return -ENODEV;
+	}
+	has_cr3_match = !!(b & BIT(0));
+	if (b & BIT(2))
+		addr_cfg_max = 2;
+	if (!(c & BIT(1)))
+		pt_num_buffers = 1;
+	pt_num_buffers = min_t(unsigned, pt_num_buffers,
+				(PAGE_SIZE / 8) - 1);
+	a1 = b1 = c1 = d1 = 0;
+	if (a >= 1)
+		cpuid_count(0x14, 1, &a1, &b1, &c1, &d1);
+	if (b & BIT(1)) {
+		mtc_freq_mask = (a1 >> 16) & 0xffff;
+		cyc_thresh_mask = b1 & 0xffff;
+		psb_freq_mask = (b1 >> 16) & 0xffff;
+		addr_range_num = a1 & 0x3;
+	}
+	return 0;
+}
+static int start_early_pt(void *arg)
+{
+	u64 val, oldval;
+	int cpu;
+
+
+	if (rdmsrl_safe(MSR_IA32_RTIT_CTL, &val) < 0) {
+		pr_info("start_early_pt: failed\n");
+		return -1;
+	}
+	oldval = val;
+
+	cpu = raw_smp_processor_id();
+	early_pt_buffer_init(cpu);
+
+	/*
+	 * Q: How should we handle PT is already enabled?
+	 * A: disable and re-enable.
+	 */
+	if (val & RTIT_CTL_TRACEEN)
+		wrmsrl_safe(MSR_IA32_RTIT_CTL, val & ~RTIT_CTL_TRACEEN);
+
+	if (wrmsrl_safe(MSR_IA32_RTIT_OUTPUT_BASE, __pa(__this_cpu_read(pt_buffer_cpu))) < 0) {
+		pr_info(" wrmsrl output base failed \n");
+		return 0;
+	}
+
+	if (single_range)
+		wrmsrl_safe(MSR_IA32_RTIT_OUTPUT_MASK,
+			((1ULL << (PAGE_SHIFT + early_pt_buffer_order)) - 1));
+	else
+		wrmsrl_safe(MSR_IA32_RTIT_OUTPUT_MASK, 0ULL);
+	wrmsrl_safe(MSR_IA32_RTIT_STATUS, 0ULL);
+
+	val &= ~(RTIT_CTL_TSC_EN | RTIT_CTL_OS | RTIT_CTL_USR | RTIT_CTL_CR3EN |
+		RTIT_CTL_DISRETC | RTIT_CTL_TOPA | RTIT_CTL_CYCLEACC |
+		RTIT_CTL_TRACEEN | RTIT_CTL_BRANCH_EN | RTIT_CTL_CYCLEACC
+		| RTIT_CTL_MTC_EN | MTC_MASK | CYC_MASK | PSB_MASK | ADDR0_MASK |
+		ADDR1_MASK);
+
+	/* enale trace */
+	val |= RTIT_CTL_TRACEEN;
+	val |= RTIT_CTL_BRANCH_EN;
+	val |= RTIT_CTL_TSC_EN;
+	val |= RTIT_CTL_OS;
+	val |= RTIT_CTL_USR;
+
+	if (wrmsrl_safe(MSR_IA32_RTIT_CTL, val) < 0) {
+		pr_info("early_pt start failed on cpu[%d]\n", cpu);
+		__this_cpu_write(early_pt_running, false);
+		return -1;
+	}
+
+	pr_info("early_pt started on cpu[%d]\n", cpu);
+	__this_cpu_write(early_pt_running, true);
+	return 0;
+}
+
+static void start_pt_no_return(void *arg)
+{
+	start_early_pt(arg);
+	return;
+}
+
+static struct miscdevice early_pt_miscdev;
+
+static int early_pt_init(void)
+{
+	int err;
+
+	err = early_pt_cpuid_caps();
+	if (err < 0) {
+		pr_info("early_pt_init: no feature available\n");
+		return err;
+	}
+	on_each_cpu(start_pt_no_return, NULL, 0);
+	return 0;
+}
+static int late_pt_init(void)
+{
+	int err;
+	unsigned int i;
+
+	for_each_online_cpu(i) {
+		if (!per_cpu(early_pt_running, i)) {
+			pr_err("late_pt_init: failed, early pt not running\n");
+			return -ENODEV;
+		}
+	}
+
+	err = misc_register(&early_pt_miscdev);
+	if (err < 0) {
+		pr_err("misc_register early_pt_miscdev failed\n");
+		return err;
+	}
+	return 0;
+}
+static int stop_early_pt(void *arg)
+{
+	u64 offset;
+	u64 ctl, status;
+	int cpu;
+
+	cpu = raw_smp_processor_id();
+
+	if (!__this_cpu_read(early_pt_running))
+		return -1;
+	rdmsrl_safe(MSR_IA32_RTIT_CTL, &ctl);
+	rdmsrl_safe(MSR_IA32_RTIT_STATUS, &status);
+	if (!(ctl & RTIT_CTL_TRACEEN))
+		pr_debug("cpu %d, PT not enabled on stop, ctl %llx, status %llx\n",
+			raw_smp_processor_id(), ctl, status);
+	if (status & PT_ERROR) {
+		pr_info("cpu %d, error: status %llx\n", cpu, status);
+		wrmsrl_safe(MSR_IA32_RTIT_STATUS, 0);
+	}
+	wrmsrl_safe(MSR_IA32_RTIT_CTL, 0LL);
+	rdmsrl_safe(MSR_IA32_RTIT_OUTPUT_MASK, &offset);
+
+	__this_cpu_write(pt_offset, (offset >> 32));
+	__this_cpu_write(early_pt_running, false);
+	pr_info("early_pt stopped on cpu[%d]\n", cpu);
+	return 0;
+}
+static void stop_pt_no_return(void *arg)
+{
+	stop_early_pt(arg);
+	return;
+}
+static void early_pt_exit(void)
+{
+	on_each_cpu(stop_pt_no_return, NULL, 0);
+	on_each_cpu(early_pt_buffer_exit, NULL, 0);
+}
+core_initcall(early_pt_init);
+late_initcall(late_pt_init);
+
+static int early_pt_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	unsigned long len = vma->vm_end - vma->vm_start;
+	int cpu = (long) file->private_data;
+	unsigned long buffer_size = PAGE_SIZE << early_pt_buffer_order;
+
+	vma->vm_flags &= ~VM_MAYWRITE;
+
+	if (len % PAGE_SIZE || vma->vm_pgoff)
+		return -EINVAL;
+
+	if (vma->vm_flags & VM_WRITE)
+		return -EPERM;
+
+	return remap_pfn_range(vma, vma->vm_start,
+			__pa(per_cpu(pt_buffer_cpu, cpu)) >> PAGE_SHIFT,
+			buffer_size,
+			vma->vm_page_prot);
+}
+
+static long early_pt_ioctl(struct file *file, unsigned int cmd,
+			unsigned long arg)
+{
+	unsigned long cpu;
+
+	switch (cmd) {
+	case PT_SET_CPU: {
+		cpu = arg;
+		if (cpu >= NR_CPUS || !cpu_online(cpu))
+			return -EINVAL;
+		file->private_data = (void *)cpu;
+		return 0;
+	}
+	case PT_GET_SIZE:
+		return put_user((PAGE_SIZE << early_pt_buffer_order),
+				(int *)arg);
+	case PT_GET_OFFSET: {
+		unsigned offset;
+		cpu = (unsigned long) file->private_data;
+		smp_call_on_cpu(cpu, stop_early_pt, NULL, true);
+		offset = per_cpu(pt_offset, (long)file->private_data);
+		return put_user(offset, (int *)arg);
+	}
+	case PT_STOP:
+		early_pt_exit();
+		return 0;
+	case PT_START:
+		cpu = (unsigned long) file->private_data;
+		smp_call_on_cpu(cpu, start_early_pt, NULL, true);
+		return 0;
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations early_pt_fops = {
+	.mmap = early_pt_mmap,
+	.unlocked_ioctl = early_pt_ioctl,
+	.llseek = noop_llseek,
+};
+
+static struct miscdevice early_pt_miscdev = {
+	MISC_DYNAMIC_MINOR,
+	"simple-pt",
+	&early_pt_fops
+};
+
-- 
2.7.5


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2017-09-30  3:22 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-30  3:22 [PATCH v1 1/9] early pt: Basic support for early intel processor trace Luming Yu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.