All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@elte.hu>,
	Andrew Morton <akpm@linux-foundation.org>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Thomas Gleixner <tglx@linutronix.de>
Subject: [PATCH 07/20] tracing: Have dynamic size event stack traces
Date: Sat, 16 Jul 2011 07:22:07 -0400	[thread overview]
Message-ID: <20110716112253.048577160@goodmis.org> (raw)
In-Reply-To: 20110716112200.096203519@goodmis.org

[-- Attachment #1: 0007-tracing-Have-dynamic-size-event-stack-traces.patch --]
[-- Type: text/plain, Size: 6228 bytes --]

From: Steven Rostedt <srostedt@redhat.com>

Currently the stack trace per event in ftace is only 8 frames.
This can be quite limiting and sometimes useless. Especially when
the "ignore frames" is wrong and we also use up stack frames for
the event processing itself.

Change this to be dynamic by adding a percpu buffer that we can
write a large stack frame into and then copy into the ring buffer.

For interrupts and NMIs that come in while another event is being
process, will only get to use the 8 frame stack. That should be enough
as the task that it interrupted will have the full stack frame anyway.

Requested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |    1 +
 kernel/trace/trace.c         |   92 ++++++++++++++++++++++++++++++++++++------
 kernel/trace/trace_entries.h |    3 +-
 kernel/trace/trace_output.c  |   11 +++--
 4 files changed, 88 insertions(+), 19 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index b1e69ee..96efa67 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -76,6 +76,7 @@ struct trace_iterator {
 	struct trace_entry	*ent;
 	unsigned long		lost_events;
 	int			leftover;
+	int			ent_size;
 	int			cpu;
 	u64			ts;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d9c1612..e5df02c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1248,6 +1248,15 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
 }
 
 #ifdef CONFIG_STACKTRACE
+
+#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
+struct ftrace_stack {
+	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
+};
+
+static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
+static DEFINE_PER_CPU(int, ftrace_stack_reserve);
+
 static void __ftrace_trace_stack(struct ring_buffer *buffer,
 				 unsigned long flags,
 				 int skip, int pc, struct pt_regs *regs)
@@ -1256,25 +1265,77 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 	struct ring_buffer_event *event;
 	struct stack_entry *entry;
 	struct stack_trace trace;
+	int use_stack;
+	int size = FTRACE_STACK_ENTRIES;
+
+	trace.nr_entries	= 0;
+	trace.skip		= skip;
+
+	/*
+	 * Since events can happen in NMIs there's no safe way to
+	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
+	 * or NMI comes in, it will just have to use the default
+	 * FTRACE_STACK_SIZE.
+	 */
+	preempt_disable_notrace();
+
+	use_stack = ++__get_cpu_var(ftrace_stack_reserve);
+	/*
+	 * We don't need any atomic variables, just a barrier.
+	 * If an interrupt comes in, we don't care, because it would
+	 * have exited and put the counter back to what we want.
+	 * We just need a barrier to keep gcc from moving things
+	 * around.
+	 */
+	barrier();
+	if (use_stack == 1) {
+		trace.entries		= &__get_cpu_var(ftrace_stack).calls[0];
+		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
+
+		if (regs)
+			save_stack_trace_regs(regs, &trace);
+		else
+			save_stack_trace(&trace);
+
+		if (trace.nr_entries > size)
+			size = trace.nr_entries;
+	} else
+		/* From now on, use_stack is a boolean */
+		use_stack = 0;
+
+	size *= sizeof(unsigned long);
 
 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
-					  sizeof(*entry), flags, pc);
+					  sizeof(*entry) + size, flags, pc);
 	if (!event)
-		return;
-	entry	= ring_buffer_event_data(event);
-	memset(&entry->caller, 0, sizeof(entry->caller));
+		goto out;
+	entry = ring_buffer_event_data(event);
 
-	trace.nr_entries	= 0;
-	trace.max_entries	= FTRACE_STACK_ENTRIES;
-	trace.skip		= skip;
-	trace.entries		= entry->caller;
+	memset(&entry->caller, 0, size);
+
+	if (use_stack)
+		memcpy(&entry->caller, trace.entries,
+		       trace.nr_entries * sizeof(unsigned long));
+	else {
+		trace.max_entries	= FTRACE_STACK_ENTRIES;
+		trace.entries		= entry->caller;
+		if (regs)
+			save_stack_trace_regs(regs, &trace);
+		else
+			save_stack_trace(&trace);
+	}
+
+	entry->size = trace.nr_entries;
 
-	if (regs)
-		save_stack_trace_regs(regs, &trace);
-	else
-		save_stack_trace(&trace);
 	if (!filter_check_discard(call, entry, buffer, event))
 		ring_buffer_unlock_commit(buffer, event);
+
+ out:
+	/* Again, don't let gcc optimize things here */
+	barrier();
+	__get_cpu_var(ftrace_stack_reserve)--;
+	preempt_enable_notrace();
+
 }
 
 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
@@ -1562,7 +1623,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
 
 	ftrace_enable_cpu();
 
-	return event ? ring_buffer_event_data(event) : NULL;
+	if (event) {
+		iter->ent_size = ring_buffer_event_length(event);
+		return ring_buffer_event_data(event);
+	}
+	iter->ent_size = 0;
+	return NULL;
 }
 
 static struct trace_entry *
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e32744c..9336590 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -161,7 +161,8 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
 	TRACE_STACK,
 
 	F_STRUCT(
-		__array(	unsigned long,	caller, FTRACE_STACK_ENTRIES	)
+		__field(	int,		size	)
+		__dynamic_array(unsigned long,	caller	)
 	),
 
 	F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index e37de49..5199930 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1107,19 +1107,20 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 {
 	struct stack_entry *field;
 	struct trace_seq *s = &iter->seq;
-	int i;
+	unsigned long *p;
+	unsigned long *end;
 
 	trace_assign_type(field, iter->ent);
+	end = (unsigned long *)((long)iter->ent + iter->ent_size);
 
 	if (!trace_seq_puts(s, "<stack trace>\n"))
 		goto partial;
-	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-		if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
-			break;
+
+	for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) {
 		if (!trace_seq_puts(s, " => "))
 			goto partial;
 
-		if (!seq_print_ip_sym(s, field->caller[i], flags))
+		if (!seq_print_ip_sym(s, *p, flags))
 			goto partial;
 		if (!trace_seq_puts(s, "\n"))
 			goto partial;
-- 
1.7.5.4



  parent reply	other threads:[~2011-07-16 11:25 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-16 11:22 [PATCH 00/20] [GIT PULL] perf/tracing: various updates Steven Rostedt
2011-07-16 11:22 ` [PATCH 01/20] tracing: Still trace filtered irq functions when irq trace is Steven Rostedt
2011-07-16 11:22 ` [PATCH 02/20] ftrace: Do not disable interrupts for modules in mcount update Steven Rostedt
2011-07-16 11:22 ` [PATCH 03/20] ftrace: Balance records when updating the hash Steven Rostedt
2011-07-16 11:22 ` [PATCH 04/20] ftrace: Update filter when tracing enabled in set_ftrace_filter() Steven Rostedt
2011-07-16 11:22 ` [PATCH 05/20] ftrace: Fix dynamic selftest failure on some archs Steven Rostedt
2011-07-16 11:22 ` [PATCH 06/20] perf: Robustify proc and debugfs file recording Steven Rostedt
2011-07-16 11:22 ` Steven Rostedt [this message]
2011-07-16 11:22 ` [PATCH 08/20] perf, x86: P4 PMU - Introduce event alias feature Steven Rostedt
2011-07-16 11:22 ` [PATCH 09/20] tracing/kprobes: Rename probe_* to trace_probe_* Steven Rostedt
2011-07-16 11:22 ` [PATCH 10/20] tracing/kprobes: Merge trace probe enable/disable functions Steven Rostedt
2011-07-16 11:22 ` [PATCH 11/20] kprobes: Return -ENOENT if probe point doesnt exist Steven Rostedt
2011-07-16 11:22 ` [PATCH 12/20] tracing/kprobes: Support module init function probing Steven Rostedt
2011-07-16 11:22 ` [PATCH 13/20] tracing/kprobe: Update symbol reference when loading module Steven Rostedt
2011-07-16 11:22 ` [PATCH 14/20] perf probe: Rename DIE_FIND_CB_FOUND to DIE_FIND_CB_END Steven Rostedt
2011-07-16 11:22 ` [PATCH 15/20] perf probe: Move strtailcmp to string.c Steven Rostedt
2011-07-16 11:22 ` [PATCH 16/20] perf probe: Remove redundant dwarf functions Steven Rostedt
2011-07-16 11:22 ` [PATCH 17/20] perf-probe: Move dwarf library routines to dwarf-aux.{c, h} Steven Rostedt
2011-07-16 11:22 ` [PATCH 18/20] perf probe: Introduce debuginfo to encapsulate dwarf information Steven Rostedt
2011-07-16 11:22 ` [PATCH 19/20] perf probe: Add probed module in front of function Steven Rostedt
2011-07-16 11:22 ` [PATCH 20/20] perf probe: Support adding probes on offline kernel modules Steven Rostedt
2011-07-21  9:10 ` [PATCH 00/20] [GIT PULL] perf/tracing: various updates Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110716112253.048577160@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=akpm@linux-foundation.org \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.