* [PATCH 1/3] tracing: Introduce traces for major and minor page faults
2017-04-13 2:20 [PATCH 0/3] Add fault_major, fault_minor page fault trace events Chris Redmon
@ 2017-04-13 2:20 ` Chris Redmon
2017-04-13 2:20 ` [PATCH 2/3] arm: Utilize trace events " Chris Redmon
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Chris Redmon @ 2017-04-13 2:20 UTC (permalink / raw)
To: linux-arm-kernel
Tracing for major page faults is helpful especially for diagnosing
realtime latency issues (such as failure to mlock() something on a
realtime code path)
Signed-off-by: Chris Redmon <credmonster@gmail.com>
---
include/trace/events/fault.h | 87 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 87 insertions(+)
create mode 100644 include/trace/events/fault.h
diff --git a/include/trace/events/fault.h b/include/trace/events/fault.h
new file mode 100644
index 000000000000..c4803f859750
--- /dev/null
+++ b/include/trace/events/fault.h
@@ -0,0 +1,87 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fault
+
+#if !defined(_TRACE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FAULT_H
+
+#include <asm/ptrace.h>
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+
+/*
+ * Event class for major/minor page faults:
+ */
+DECLARE_EVENT_CLASS(fault_major_minor_class,
+
+ TP_PROTO(
+ unsigned long address, struct pt_regs *regs,
+ unsigned long status, ktime_t start_time,
+ unsigned int flags, unsigned int fault),
+
+ TP_ARGS(address, regs, status, start_time, flags, fault),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, address)
+ __field(unsigned long, instruction_pointer)
+ __field(unsigned long, status)
+ __field(unsigned int, duration)
+ __field(unsigned int, flags)
+ __field(unsigned int, fault)
+ ),
+
+ TP_fast_assign(
+ __entry->address = address;
+ __entry->instruction_pointer = (regs ? instruction_pointer(regs) : 0);
+ __entry->status = (unsigned long) status;
+ __entry->duration = ktime_to_us(ktime_sub(ktime_get(), start_time));
+ __entry->flags = flags;
+ __entry->fault = fault;
+ ),
+
+ TP_printk("address=%pf ip=%pf status=0x%lx duration=%uus flags=%s fault=%s",
+ (void *)__entry->address, (void *)__entry->instruction_pointer,
+ __entry->status,
+ __entry->duration,
+ __print_flags(__entry->flags, "|",
+ {FAULT_FLAG_WRITE, "WRITE"},
+ {FAULT_FLAG_MKWRITE, "MKWRITE"},
+ {FAULT_FLAG_ALLOW_RETRY, "ALLOW_RETRY"},
+ {FAULT_FLAG_RETRY_NOWAIT, "RETRY_NOWAIT"},
+ {FAULT_FLAG_KILLABLE, "KILLABLE"},
+ {FAULT_FLAG_TRIED, "TRIED"},
+ {FAULT_FLAG_USER, "USER"}),
+ __print_flags(__entry->fault, "|",
+ {VM_FAULT_OOM, "OOM"},
+ {VM_FAULT_SIGBUS, "SIGBUS"},
+ {VM_FAULT_MAJOR, "MAJOR"},
+ {VM_FAULT_WRITE, "WRITE"},
+ {VM_FAULT_HWPOISON, "HWPOISON"},
+ {VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE"},
+ {VM_FAULT_NOPAGE, "NOPAGE"},
+ {VM_FAULT_LOCKED, "LOCKED"},
+ {VM_FAULT_RETRY, "RETRY"},
+ {VM_FAULT_FALLBACK, "FALLBACK"})
+ )
+);
+
+#define DEFINE_PAGE_FAULT_MAJOR_MINOR_EVENT(name) \
+DEFINE_EVENT(fault_major_minor_class, name, \
+ TP_PROTO(unsigned long address, struct pt_regs *regs, \
+ unsigned long status, ktime_t start_time, \
+ unsigned int flags, unsigned int fault), \
+ TP_ARGS(address, regs, status, start_time, flags, fault));
+
+/*
+ * Tracepoint for major page faults:
+ */
+DEFINE_PAGE_FAULT_MAJOR_MINOR_EVENT(fault_major);
+
+/*
+ * Tracepoint for minor page faults:
+ */
+DEFINE_PAGE_FAULT_MAJOR_MINOR_EVENT(fault_minor);
+
+#endif /* _TRACE_FAULT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--
2.12.2.599.gcf11a67
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/3] arm: Utilize trace events for major and minor page faults
2017-04-13 2:20 [PATCH 0/3] Add fault_major, fault_minor page fault trace events Chris Redmon
2017-04-13 2:20 ` [PATCH 1/3] tracing: Introduce traces for major and minor page faults Chris Redmon
@ 2017-04-13 2:20 ` Chris Redmon
2017-04-13 2:20 ` [PATCH 3/3] arm64: " Chris Redmon
2017-04-25 16:49 ` [PATCH 0/3] Add fault_major, fault_minor page fault trace events Will Deacon
3 siblings, 0 replies; 5+ messages in thread
From: Chris Redmon @ 2017-04-13 2:20 UTC (permalink / raw)
To: linux-arm-kernel
Signed-off-by: Chris Redmon <credmonster@gmail.com>
---
arch/arm/mm/fault.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index ff8b0aa2dfde..e59514f85e01 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -29,6 +29,9 @@
#include "fault.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/fault.h>
+
#ifdef CONFIG_MMU
#ifdef CONFIG_KPROBES
@@ -262,6 +265,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
struct mm_struct *mm;
int fault, sig, code;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ ktime_t start_time;
+ bool trace_fault_enabled = false;
if (notify_page_fault(regs, fsr))
return 0;
@@ -285,6 +290,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (fsr & FSR_WRITE)
flags |= FAULT_FLAG_WRITE;
+ if (trace_fault_major_enabled() || trace_fault_minor_enabled()) {
+ start_time = ktime_get();
+ trace_fault_enabled = true;
+ }
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
@@ -330,10 +340,14 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
tsk->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
regs, addr);
+ if (trace_fault_enabled)
+ trace_fault_major(addr, regs, fsr, start_time, flags, fault);
} else {
tsk->min_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
regs, addr);
+ if (trace_fault_enabled)
+ trace_fault_minor(addr, regs, fsr, start_time, flags, fault);
}
if (fault & VM_FAULT_RETRY) {
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
--
2.12.2.599.gcf11a67
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/3] arm64: Utilize trace events for major and minor page faults
2017-04-13 2:20 [PATCH 0/3] Add fault_major, fault_minor page fault trace events Chris Redmon
2017-04-13 2:20 ` [PATCH 1/3] tracing: Introduce traces for major and minor page faults Chris Redmon
2017-04-13 2:20 ` [PATCH 2/3] arm: Utilize trace events " Chris Redmon
@ 2017-04-13 2:20 ` Chris Redmon
2017-04-25 16:49 ` [PATCH 0/3] Add fault_major, fault_minor page fault trace events Will Deacon
3 siblings, 0 replies; 5+ messages in thread
From: Chris Redmon @ 2017-04-13 2:20 UTC (permalink / raw)
To: linux-arm-kernel
Signed-off-by: Chris Redmon <credmonster@gmail.com>
---
arch/arm64/mm/fault.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 1b35b8bddbfb..d3097ff6fb8e 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -42,6 +42,9 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/fault.h>
+
struct fault_info {
int (*fn)(unsigned long addr, unsigned int esr,
struct pt_regs *regs);
@@ -315,6 +318,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
int fault, sig, code;
unsigned long vm_flags = VM_READ | VM_WRITE;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ ktime_t start_time;
+ bool trace_fault_enabled = false;
if (notify_page_fault(regs, esr))
return 0;
@@ -351,6 +356,11 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
die("Accessing user space memory outside uaccess.h routines", regs, esr);
}
+ if (trace_fault_major_enabled() || trace_fault_minor_enabled()) {
+ start_time = ktime_get();
+ trace_fault_enabled = true;
+ }
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
@@ -395,10 +405,14 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
tsk->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
addr);
+ if (trace_fault_enabled)
+ trace_fault_major(addr, regs, esr, start_time, mm_flags, fault);
} else {
tsk->min_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
addr);
+ if (trace_fault_enabled)
+ trace_fault_minor(addr, regs, esr, start_time, mm_flags, fault);
}
if (fault & VM_FAULT_RETRY) {
/*
--
2.12.2.599.gcf11a67
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 0/3] Add fault_major, fault_minor page fault trace events
2017-04-13 2:20 [PATCH 0/3] Add fault_major, fault_minor page fault trace events Chris Redmon
` (2 preceding siblings ...)
2017-04-13 2:20 ` [PATCH 3/3] arm64: " Chris Redmon
@ 2017-04-25 16:49 ` Will Deacon
3 siblings, 0 replies; 5+ messages in thread
From: Will Deacon @ 2017-04-25 16:49 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Apr 12, 2017 at 10:20:37PM -0400, Chris Redmon wrote:
> These changes add common trace events for major and minor page faults,
> as well as adding these traces to the arm and arm64 architectures. These
> traces offer useful information for determining the source of page faults
> in realtime systems, as well as the time penalty for taking a major or
> minor page fault.
>
> I made an attempt to minimize the overhead when these tracepoints are not
> enabled, but I'm willing to make more changes if desired.
Why is this necessary, given that we already report these types of fault
as perf sw events?
Will
^ permalink raw reply [flat|nested] 5+ messages in thread