linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [GIT PULL] tracing fixes
@ 2010-01-16 16:57 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2010-01-16 16:57 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt,
	=?unknown-8bit?B?RnLDqWTDqXJpYw==?= Weisbecker, Andrew Morton

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
David Sharp (1):
      ring-buffer: Wrap a list.next reference with rb_list_head()

Jan Kiszka (1):
      tracing/x86: Derive arch from bits argument in recordmcount.pl

Li Zefan (7):
      ftrace: Fix MATCH_END_ONLY function filter
      tracing/filters: Fix MATCH_FRONT_ONLY filter matching
      tracing/filters: Fix MATCH_END_ONLY filter matching
      lib: Introduce strnstr()
      tracing/filters: Fix MATCH_MIDDLE_ONLY filter matching
      tracing/filters: Fix MATCH_FULL filter matching for PTR_STRING
      tracing/filters: Add comment for match callbacks

Steven Rostedt (1):
      ring-buffer: Add rb_list_head() wrapper around new reader page next field


 include/linux/string.h             |    5 ++++-
 kernel/trace/ftrace.c              |    6 +++---
 kernel/trace/ring_buffer.c         |    4 ++--
 kernel/trace/trace_events_filter.c |   29 ++++++++++++++++++++---------
 lib/string.c                       |   27 ++++++++++++++++++++++++++-
 scripts/recordmcount.pl            |    2 +-
 6 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/include/linux/string.h b/include/linux/string.h
index 651839a..a716ee2 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -72,7 +72,10 @@ static inline __must_check char *strstrip(char *str)
 }
 
 #ifndef __HAVE_ARCH_STRSTR
-extern char * strstr(const char *,const char *);
+extern char * strstr(const char *, const char *);
+#endif
+#ifndef __HAVE_ARCH_STRNSTR
+extern char * strnstr(const char *, const char *, size_t);
 #endif
 #ifndef __HAVE_ARCH_STRLEN
 extern __kernel_size_t strlen(const char *);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7968762..1e6640f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1690,7 +1690,7 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
 static int ftrace_match(char *str, char *regex, int len, int type)
 {
 	int matched = 0;
-	char *ptr;
+	int slen;
 
 	switch (type) {
 	case MATCH_FULL:
@@ -1706,8 +1706,8 @@ static int ftrace_match(char *str, char *regex, int len, int type)
 			matched = 1;
 		break;
 	case MATCH_END_ONLY:
-		ptr = strstr(str, regex);
-		if (ptr && (ptr[len] == 0))
+		slen = strlen(str);
+		if (slen >= len && memcmp(str + slen - len, regex, len) == 0)
 			matched = 1;
 		break;
 	}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2326b04..edefe3b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2869,7 +2869,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	 * Splice the empty reader page into the list around the head.
 	 */
 	reader = rb_set_head_page(cpu_buffer);
-	cpu_buffer->reader_page->list.next = reader->list.next;
+	cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
 	cpu_buffer->reader_page->list.prev = reader->list.prev;
 
 	/*
@@ -2906,7 +2906,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	 *
 	 * Now make the new head point back to the reader page.
 	 */
-	reader->list.next->prev = &cpu_buffer->reader_page->list;
+	rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
 	rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
 
 	/* Finally update the reader page to the new head */
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 50504cb..e42af9a 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -211,8 +211,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
 {
 	char **addr = (char **)(event + pred->offset);
 	int cmp, match;
+	int len = strlen(*addr) + 1;	/* including tailing '\0' */
 
-	cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len);
+	cmp = pred->regex.match(*addr, &pred->regex, len);
 
 	match = cmp ^ pred->not;
 
@@ -251,7 +252,18 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
 	return 0;
 }
 
-/* Basic regex callbacks */
+/*
+ * regex_match_foo - Basic regex callbacks
+ *
+ * @str: the string to be searched
+ * @r:   the regex structure containing the pattern string
+ * @len: the length of the string to be searched (including '\0')
+ *
+ * Note:
+ * - @str might not be NULL-terminated if it's of type DYN_STRING
+ *   or STATIC_STRING
+ */
+
 static int regex_match_full(char *str, struct regex *r, int len)
 {
 	if (strncmp(str, r->pattern, len) == 0)
@@ -261,23 +273,24 @@ static int regex_match_full(char *str, struct regex *r, int len)
 
 static int regex_match_front(char *str, struct regex *r, int len)
 {
-	if (strncmp(str, r->pattern, len) == 0)
+	if (strncmp(str, r->pattern, r->len) == 0)
 		return 1;
 	return 0;
 }
 
 static int regex_match_middle(char *str, struct regex *r, int len)
 {
-	if (strstr(str, r->pattern))
+	if (strnstr(str, r->pattern, len))
 		return 1;
 	return 0;
 }
 
 static int regex_match_end(char *str, struct regex *r, int len)
 {
-	char *ptr = strstr(str, r->pattern);
+	int strlen = len - 1;
 
-	if (ptr && (ptr[r->len] == 0))
+	if (strlen >= r->len &&
+	    memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
 		return 1;
 	return 0;
 }
@@ -781,10 +794,8 @@ static int filter_add_pred(struct filter_parse_state *ps,
 			pred->regex.field_len = field->size;
 		} else if (field->filter_type == FILTER_DYN_STRING)
 			fn = filter_pred_strloc;
-		else {
+		else
 			fn = filter_pred_pchar;
-			pred->regex.field_len = strlen(pred->regex.pattern);
-		}
 	} else {
 		if (field->is_signed)
 			ret = strict_strtoll(pred->regex.pattern, 0, &val);
diff --git a/lib/string.c b/lib/string.c
index 9f75b4e..a1cdcfc 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -667,7 +667,7 @@ EXPORT_SYMBOL(memscan);
  */
 char *strstr(const char *s1, const char *s2)
 {
-	int l1, l2;
+	size_t l1, l2;
 
 	l2 = strlen(s2);
 	if (!l2)
@@ -684,6 +684,31 @@ char *strstr(const char *s1, const char *s2)
 EXPORT_SYMBOL(strstr);
 #endif
 
+#ifndef __HAVE_ARCH_STRNSTR
+/**
+ * strnstr - Find the first substring in a length-limited string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ * @len: the maximum number of characters to search
+ */
+char *strnstr(const char *s1, const char *s2, size_t len)
+{
+	size_t l1 = len, l2;
+
+	l2 = strlen(s2);
+	if (!l2)
+		return (char *)s1;
+	while (l1 >= l2) {
+		l1--;
+		if (!memcmp(s1, s2, l2))
+			return (char *)s1;
+		s1++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(strnstr);
+#endif
+
 #ifndef __HAVE_ARCH_MEMCHR
 /**
  * memchr - Find a character in an area of memory.
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 92f09fe..ea6f6e3 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -194,7 +194,7 @@ sub check_objcopy
     }
 }
 
-if ($arch eq "x86") {
+if ($arch =~ /(x86(_64)?)|(i386)/) {
     if ($bits == 64) {
 	$arch = "x86_64";
     } else {

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2010-04-04 10:09 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2010-04-04 10:09 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt, Frédéric Weisbecker,
	Andrew Morton

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Julia Lawall (1):
      ring-buffer: Add missing unlock

Li Zefan (1):
      tracing: Fix lockdep warning in global_clock()


 kernel/trace/ring_buffer.c |    8 +++++---
 kernel/trace/trace_clock.c |    4 ++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d1187ef..9a0f9bf 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1209,18 +1209,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
 
 	for (i = 0; i < nr_pages; i++) {
 		if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
-			return;
+			goto out;
 		p = cpu_buffer->pages->next;
 		bpage = list_entry(p, struct buffer_page, list);
 		list_del_init(&bpage->list);
 		free_buffer_page(bpage);
 	}
 	if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
-		return;
+		goto out;
 
 	rb_reset_cpu(cpu_buffer);
 	rb_check_pages(cpu_buffer);
 
+out:
 	spin_unlock_irq(&cpu_buffer->reader_lock);
 }
 
@@ -1237,7 +1238,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
 
 	for (i = 0; i < nr_pages; i++) {
 		if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
-			return;
+			goto out;
 		p = pages->next;
 		bpage = list_entry(p, struct buffer_page, list);
 		list_del_init(&bpage->list);
@@ -1246,6 +1247,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
 	rb_reset_cpu(cpu_buffer);
 	rb_check_pages(cpu_buffer);
 
+out:
 	spin_unlock_irq(&cpu_buffer->reader_lock);
 }
 
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 6fbfb8f..9d589d8 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -84,7 +84,7 @@ u64 notrace trace_clock_global(void)
 	int this_cpu;
 	u64 now;
 
-	raw_local_irq_save(flags);
+	local_irq_save(flags);
 
 	this_cpu = raw_smp_processor_id();
 	now = cpu_clock(this_cpu);
@@ -110,7 +110,7 @@ u64 notrace trace_clock_global(void)
 	arch_spin_unlock(&trace_clock_struct.lock);
 
  out:
-	raw_local_irq_restore(flags);
+	local_irq_restore(flags);
 
 	return now;
 }

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2010-03-13 16:33 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2010-03-13 16:33 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt, Fr??d??ric Weisbecker,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (1):
      tracing: Update the comm field in the right variable in update_max_tr

Frederic Weisbecker (1):
      tracing: Include irqflags headers from trace clock

Lai Jiangshan (2):
      tracing: Fix warning in s_next of trace file ops
      ring-buffer: Move disabled check into preempt disable section

Steven Rostedt (6):
      function-graph: Fix unused reference to ftrace_set_func()
      function-graph: Use comment notation for func names of dangling '}'
      function-graph: Init curr_ret_stack with ret_stack
      tracing: Use same local variable when resetting the ring buffer
      tracing: Disable buffer switching when starting or stopping trace
      tracing: Do not record user stack trace from NMI context

Tim Bird (1):
      function-graph: Add tracing_thresh support to function_graph tracer


 kernel/trace/ftrace.c                |    8 ++---
 kernel/trace/ring_buffer.c           |   12 ++++----
 kernel/trace/trace.c                 |   49 ++++++++++++++++++++++++++++-----
 kernel/trace/trace.h                 |    3 +-
 kernel/trace/trace_clock.c           |    1 +
 kernel/trace/trace_functions_graph.c |   27 ++++++++++++++++--
 6 files changed, 77 insertions(+), 23 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8378357..bb53edb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -84,10 +84,6 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
-#endif
-
 static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
 {
 	struct ftrace_ops *op = ftrace_list;
@@ -2276,6 +2272,8 @@ __setup("ftrace_filter=", set_ftrace_filter);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
+static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
+
 static int __init set_graph_function(char *str)
 {
 	strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@ -3351,6 +3349,7 @@ void ftrace_graph_init_task(struct task_struct *t)
 {
 	/* Make sure we do not use the parent ret_stack */
 	t->ret_stack = NULL;
+	t->curr_ret_stack = -1;
 
 	if (ftrace_graph_active) {
 		struct ftrace_ret_stack *ret_stack;
@@ -3360,7 +3359,6 @@ void ftrace_graph_init_task(struct task_struct *t)
 				GFP_KERNEL);
 		if (!ret_stack)
 			return;
-		t->curr_ret_stack = -1;
 		atomic_set(&t->tracing_graph_pause, 0);
 		atomic_set(&t->trace_overrun, 0);
 		t->ftrace_timestamp = 0;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8c1b2d2..54191d6 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2232,12 +2232,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 	if (ring_buffer_flags != RB_BUFFERS_ON)
 		return NULL;
 
-	if (atomic_read(&buffer->record_disabled))
-		return NULL;
-
 	/* If we are tracing schedule, we don't want to recurse */
 	resched = ftrace_preempt_disable();
 
+	if (atomic_read(&buffer->record_disabled))
+		goto out_nocheck;
+
 	if (trace_recursive_lock())
 		goto out_nocheck;
 
@@ -2469,11 +2469,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
 	if (ring_buffer_flags != RB_BUFFERS_ON)
 		return -EBUSY;
 
-	if (atomic_read(&buffer->record_disabled))
-		return -EBUSY;
-
 	resched = ftrace_preempt_disable();
 
+	if (atomic_read(&buffer->record_disabled))
+		goto out;
+
 	cpu = raw_smp_processor_id();
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 032c57c..e52683f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -374,6 +374,21 @@ static int __init set_buf_size(char *str)
 }
 __setup("trace_buf_size=", set_buf_size);
 
+static int __init set_tracing_thresh(char *str)
+{
+	unsigned long threshhold;
+	int ret;
+
+	if (!str)
+		return 0;
+	ret = strict_strtoul(str, 0, &threshhold);
+	if (ret < 0)
+		return 0;
+	tracing_thresh = threshhold * 1000;
+	return 1;
+}
+__setup("tracing_thresh=", set_tracing_thresh);
+
 unsigned long nsecs_to_usecs(unsigned long nsecs)
 {
 	return nsecs / 1000;
@@ -579,9 +594,10 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
 static arch_spinlock_t ftrace_max_lock =
 	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 
+unsigned long __read_mostly	tracing_thresh;
+
 #ifdef CONFIG_TRACER_MAX_TRACE
 unsigned long __read_mostly	tracing_max_latency;
-unsigned long __read_mostly	tracing_thresh;
 
 /*
  * Copy the new maximum trace into the separate maximum-trace
@@ -592,7 +608,7 @@ static void
 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
 	struct trace_array_cpu *data = tr->data[cpu];
-	struct trace_array_cpu *max_data = tr->data[cpu];
+	struct trace_array_cpu *max_data;
 
 	max_tr.cpu = cpu;
 	max_tr.time_start = data->preempt_timestamp;
@@ -602,7 +618,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	max_data->critical_start = data->critical_start;
 	max_data->critical_end = data->critical_end;
 
-	memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
+	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
 	max_data->pid = tsk->pid;
 	max_data->uid = task_uid(tsk);
 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
@@ -824,10 +840,10 @@ out:
 	mutex_unlock(&trace_types_lock);
 }
 
-static void __tracing_reset(struct trace_array *tr, int cpu)
+static void __tracing_reset(struct ring_buffer *buffer, int cpu)
 {
 	ftrace_disable_cpu();
-	ring_buffer_reset_cpu(tr->buffer, cpu);
+	ring_buffer_reset_cpu(buffer, cpu);
 	ftrace_enable_cpu();
 }
 
@@ -839,7 +855,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
 
 	/* Make sure all commits have finished */
 	synchronize_sched();
-	__tracing_reset(tr, cpu);
+	__tracing_reset(buffer, cpu);
 
 	ring_buffer_record_enable(buffer);
 }
@@ -857,7 +873,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		__tracing_reset(tr, cpu);
+		__tracing_reset(buffer, cpu);
 
 	ring_buffer_record_enable(buffer);
 }
@@ -934,6 +950,8 @@ void tracing_start(void)
 		goto out;
 	}
 
+	/* Prevent the buffers from switching */
+	arch_spin_lock(&ftrace_max_lock);
 
 	buffer = global_trace.buffer;
 	if (buffer)
@@ -943,6 +961,8 @@ void tracing_start(void)
 	if (buffer)
 		ring_buffer_record_enable(buffer);
 
+	arch_spin_unlock(&ftrace_max_lock);
+
 	ftrace_start();
  out:
 	spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -964,6 +984,9 @@ void tracing_stop(void)
 	if (trace_stop_count++)
 		goto out;
 
+	/* Prevent the buffers from switching */
+	arch_spin_lock(&ftrace_max_lock);
+
 	buffer = global_trace.buffer;
 	if (buffer)
 		ring_buffer_record_disable(buffer);
@@ -972,6 +995,8 @@ void tracing_stop(void)
 	if (buffer)
 		ring_buffer_record_disable(buffer);
 
+	arch_spin_unlock(&ftrace_max_lock);
+
  out:
 	spin_unlock_irqrestore(&tracing_start_lock, flags);
 }
@@ -1259,6 +1284,13 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 	if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
 		return;
 
+	/*
+	 * NMIs can not handle page faults, even with fix ups.
+	 * The save user stack can (and often does) fault.
+	 */
+	if (unlikely(in_nmi()))
+		return;
+
 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
 					  sizeof(*entry), flags, pc);
 	if (!event)
@@ -1703,6 +1735,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 
 		ftrace_enable_cpu();
 
+		iter->leftover = 0;
 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
 			;
 
@@ -4248,10 +4281,10 @@ static __init int tracer_init_debugfs(void)
 #ifdef CONFIG_TRACER_MAX_TRACE
 	trace_create_file("tracing_max_latency", 0644, d_tracer,
 			&tracing_max_latency, &tracing_max_lat_fops);
+#endif
 
 	trace_create_file("tracing_thresh", 0644, d_tracer,
 			&tracing_thresh, &tracing_max_lat_fops);
-#endif
 
 	trace_create_file("README", 0444, d_tracer,
 			NULL, &tracing_readme_fops);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fd05bca..1bc8cd1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -396,9 +396,10 @@ extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
 
 extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 
+extern unsigned long tracing_thresh;
+
 #ifdef CONFIG_TRACER_MAX_TRACE
 extern unsigned long tracing_max_latency;
-extern unsigned long tracing_thresh;
 
 void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
 void update_max_tr_single(struct trace_array *tr,
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 84a3a7b..6fbfb8f 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -13,6 +13,7 @@
  * Tracer plugins will chose a default from these clocks.
  */
 #include <linux/spinlock.h>
+#include <linux/irqflags.h>
 #include <linux/hardirq.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index e998a82..e9df04b 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -237,6 +237,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 	return ret;
 }
 
+int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
+{
+	if (tracing_thresh)
+		return 1;
+	else
+		return trace_graph_entry(trace);
+}
+
 static void __trace_graph_return(struct trace_array *tr,
 				struct ftrace_graph_ret *trace,
 				unsigned long flags,
@@ -290,13 +298,26 @@ void set_graph_array(struct trace_array *tr)
 	smp_mb();
 }
 
+void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
+{
+	if (tracing_thresh &&
+	    (trace->rettime - trace->calltime < tracing_thresh))
+		return;
+	else
+		trace_graph_return(trace);
+}
+
 static int graph_trace_init(struct trace_array *tr)
 {
 	int ret;
 
 	set_graph_array(tr);
-	ret = register_ftrace_graph(&trace_graph_return,
-				    &trace_graph_entry);
+	if (tracing_thresh)
+		ret = register_ftrace_graph(&trace_graph_thresh_return,
+					    &trace_graph_thresh_entry);
+	else
+		ret = register_ftrace_graph(&trace_graph_return,
+					    &trace_graph_entry);
 	if (ret)
 		return ret;
 	tracing_start_cmdline_record();
@@ -920,7 +941,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	} else {
-		ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func);
+		ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: [GIT PULL] tracing fixes
  2010-02-15  4:50   ` Ingo Molnar
@ 2010-02-15 16:57     ` Masami Hiramatsu
  0 siblings, 0 replies; 53+ messages in thread
From: Masami Hiramatsu @ 2010-02-15 16:57 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Steven Rostedt,
	Fr??d??ric Weisbecker, Andrew Morton, Arnaldo Carvalho de Melo

[-- Attachment #1: Type: text/plain, Size: 2106 bytes --]

Ingo Molnar wrote:
> 
> * Masami Hiramatsu <mhiramat@redhat.com> wrote:
> 
>> Hi Ingo,
>>
>> Could you push this commit too?
>>
>> 5ecaafdbf44b1ba400b746c60c401d54c7ee0863
>> kprobes: Add mcount to the kprobes blacklist
>>
>> Since this bug can be easily reproduced with kprobe-tracer.
> 
> Ok, i've cherry-picked it into urgent.
> 
> I'm wondering, how complete is the kprobes blacklist? You can test it by 
> putting a probe on every single function in the system via something like:
> 
>   for N in $(cut -d' ' -f3 /proc/kallsyms  | cut -f1); do
>     perf probe $N
>     perf probe -d $N
>   done

Yeah, something like that, and I used kprobe stress test tool which
I attached for that :)

 From http://lkml.org/lkml/2009/8/13/497
---
Usage
-----
   kprobestest [-s SYMLIST] [-b BLACKLIST] [-w WHITELIST]
      Run stress test. If SYMLIST file is specified, use it as
      an initial symbol list (This is useful for verifying white list
      after diagnosing all symbols).
   kprobestest cleanup
      Cleanup all lists


How to Work
-----------
This tool list up all symbols in the kernel via /proc/kallsyms, and sorts
it into groups (each of them including 64 symbols in default). And then,
it tests each group by using kprobe-tracer. If a kernel crash occurred,
that group is moved into 'failed' dir. If the group passed the test, this
script moves it into 'passed' dir and saves kprobe_profile into
'passed/profiles/'.
After testing all groups, all 'failed' groups are merged and sorted into
smaller groups (divided by 4, in default). And those are tested again.
This loop will be repeated until all group has just 1 symbol.
Finally, the script sorts all 'passed' symbols into 'tested', 'untested',
and 'missed' based on profiles.


Note
----
  - This script just gives us some clues to the blacklisted functions.
    In some cases, a combination of probe points will cause a problem, but
    each of them doesn't cause the problem alone.
---

Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


[-- Attachment #2: kprobestest --]
[-- Type: text/plain, Size: 4955 bytes --]

#!/bin/bash
#
#  kprobestest: Kprobes stress test tool
#  Written by Masami Hiramatsu <mhiramat@redhat.com>
#
#  Usage:
#     $ kprobestest [-s SYMLIST] [-b BLACKLIST] [-w WHITELIST]
#        Run stress test. If SYMLIST file is specified, use it as 
#        an initial symbol list (This is useful for verifying white list
#        after diagnosing all symbols).
#
#     $ kprobestest cleanup
#        Cleanup all lists

DEBUGFS=/debug
INITNR=64
DIV=4
SYMFILE=syms.list
FAILFILE=black.list
FAILBKUP=1

function do_test () {
  # Do some benchmark
  for i in {1..4} ; do
  sleep 0.5
  echo -n "."
  done
}

function usage () {
  echo "Usage: kprobestest [cleanup] [-s SYMLIST] [-b BLACKLIST] [-w WHITELIST]"
  exit 0
}

function cleanup_test () {
  rm -rf $SYMFILE failed passed testing unset backup
  exit 0
}


# Parse arguments
WHITELIST=
BLACKLIST=
SYMLIST=

while [ "$1" ]; do
case $1 in
  cleanup)
    cleanup_test
    ;;
  run)
    # ignore
    ;;
  -s)
    SYMLIST=$2
    shift 1
    ;;
  -b)
    BLACKLIST=$2
    shift 1
    ;;
  -w)
    WHITELIST=$2
    shift 1
    ;;
  *)
    usage
    ;;
esac
shift 1
done

# Show configurations
echo "Kprobe stress test starting."
[ -f "$BLACKLIST" ] && echo "Blacklist: $BLACKLIST" || BLACKLIST=""
[ -f "$WHITELIST" ] && echo "Whitelist: $WHITELIST" || WHITELIST=""
[ -f "$SYMLIST" ] && echo "Symlist: $SYMLIST" || SYMLIST=""

function make_filter () {
local EXP=""
if [ -z "$WHITELIST" -a -z "$BLACKLIST" ]; then
  echo "s/^$//g"
else
  for i in `cat $WHITELIST $BLACKLIST` ;do
    [ -z "$EXP" ] && EXP="^$i\$" || EXP="$EXP\\|^$i\$"
  done ; EXP="s/$EXP//g"
  echo $EXP
fi
}

function list_allsyms () {
local sym
local out=0
for sym in `sort /proc/kallsyms | egrep '[0-9a-f]+ [Tt] [^[]*$' | cut -d\  -f 3`;do
  [ $sym  = "_stext" ] && out=1 && continue
  [ $sym  = "__kprobes_text_start" ] && out=0 && continue
  [ $sym  = "__kprobes_text_end" ] && out=1 && continue
  [ $sym  = "_etext" ] && break
  [ $out -eq 1 ] && echo $sym
done
}

function prep_testing () {
local i=0
local n=0
local NR=$1
local fname=

echo "Grouping symbols: $NR"

fname=`printf "list-%03d.%d" $i $NR`
cat $SYMFILE | while read ln; do
  [ -z "$ln" ] && continue
  echo "$ln" >> testing/$fname
  n=$((n+1))
  if [ $n -eq $NR ]; then
    n=0
    i=$((i+1))
    fname=`printf "list-%03d.%d" $i $NR`
  fi
done
sync
}

function init_first () {
local EXP
EXP=`make_filter`
if [ -f "$SYMLIST" ]; then
  cat $SYMLIST | sed $EXP > $SYMFILE
else
  echo -n "Generating symbol list from /proc/kallsyms..."
  list_allsyms | sed $EXP > $SYMFILE
  echo "done. " `wc -l $SYMFILE | cut -f1 -d\  ` "symbols listed."
fi
mkdir testing
mkdir failed
mkdir backup
mkdir unset
mkdir passed
mkdir passed/profiles
prep_testing $INITNR
}

function get_max_nr () {
wc -l failed/list-* unset/list-* 2>/dev/null |\
awk '/^ *[0-9]+ .*list.*$/{ if (nr < $1) nr=$1 } BEGIN { nr=0 } END { print nr}'
}

function init_next () {
local NR
NR=`get_max_nr`
[ $NR -eq 0 ] && return 1
[ $NR -eq 1 ] && return 2
[ $NR -le $DIV ] && NR=1 || NR=`expr $NR / $DIV`

cat failed/* unset/* > $SYMFILE
if [ $FAILBKUP ]; then
    cp failed/* unset/* backup/
fi
rm failed/* unset/*

prep_testing $NR
return 0
}


# Initialize symbols
if [ ! -d testing ]; then
  init_first
elif [ -z "`ls testing/`" ]; then
  init_next
fi

function set_probes () {
local s
for s in `cat $1`; do
  echo "p:$s" $s >> $DEBUGFS/tracing/kprobe_events
  [ $? -ne 0 ] && return 1
  echo 1 > $DEBUGFS/tracing/events/kprobes/$s/enable
done
return 0
}

function clear_probes () {
  echo > $DEBUGFS/tracing/kprobe_events
}

function save_profile () {
  cat $DEBUGFS/tracing/kprobe_profile > passed/profiles/$1.prof
}

clear_probes

echo "Starting tests.."

RET=0

while [ $RET -eq 0 ]; do

  for list in `cd testing/; ls`; do
  echo -n $list
  mv testing/$list failed/
  sync;sync
  echo -n "sync.."
  set_probes failed/$list
  if [ $? -ne 0 ]; then
    clear_probes
    sync;sync
    echo "can not set"
    mv failed/$list unset/
    sync;sync
  else
    do_test
    save_profile $list
    clear_probes
    sync;sync
    echo "done"
    mv failed/$list passed/
    sync;sync
  fi
  done

  init_next
  RET=$?
done

if [ $RET -eq 1 ];then
# No failed symbols
  echo "no failed symbols found."
else
  echo "found failed symbols:"
  cat failed/* | tee $FAILFILE
  rm failed/*
fi

cat unset/* > "unset.list"
rm unset/*

function profile_symbols () {
  local s h m
  rm -f tested.list missed.list untested.list
  cat passed/profiles/*.prof | while read s h m ;do
    if [ $h -ne 0 ]; then 
      echo $s >> tested.list
    elif [ $m -ne 0 ]; then
      echo $s >> missed.list
    else
      echo $s >> untested.list
    fi
  done
}

echo -n "Profiling symbols..."
profile_symbols
echo done
echo tested: `wc -l tested.list | cut -d\  -f1` symbols
echo missed: `wc -l missed.list | cut -d\  -f1` symbols
echo untested: `wc -l untested.list | cut -d\  -f1` symbols
echo unset: `wc -l unset.list | cut -d\  -f1` symbols


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [GIT PULL] tracing fixes
  2010-02-15  1:40 ` Masami Hiramatsu
@ 2010-02-15  4:50   ` Ingo Molnar
  2010-02-15 16:57     ` Masami Hiramatsu
  0 siblings, 1 reply; 53+ messages in thread
From: Ingo Molnar @ 2010-02-15  4:50 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Linus Torvalds, linux-kernel, Steven Rostedt,
	Fr??d??ric Weisbecker, Andrew Morton, Arnaldo Carvalho de Melo


* Masami Hiramatsu <mhiramat@redhat.com> wrote:

> Hi Ingo,
> 
> Could you push this commit too?
> 
> 5ecaafdbf44b1ba400b746c60c401d54c7ee0863
> kprobes: Add mcount to the kprobes blacklist
> 
> Since this bug can be easily reproduced with kprobe-tracer.

Ok, i've cherry-picked it into urgent.

I'm wondering, how complete is the kprobes blacklist? You can test it by 
putting a probe on every single function in the system via something like:

  for N in $(cut -d' ' -f3 /proc/kallsyms  | cut -f1); do
    perf probe $N
    perf probe -d $N
  done

(note: I havent tested it myself yet.)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [GIT PULL] tracing fixes
  2010-02-14  9:09 Ingo Molnar
@ 2010-02-15  1:40 ` Masami Hiramatsu
  2010-02-15  4:50   ` Ingo Molnar
  0 siblings, 1 reply; 53+ messages in thread
From: Masami Hiramatsu @ 2010-02-15  1:40 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Steven Rostedt,
	Fr??d??ric Weisbecker, Andrew Morton

Hi Ingo,

Could you push this commit too?

5ecaafdbf44b1ba400b746c60c401d54c7ee0863
kprobes: Add mcount to the kprobes blacklist

Since this bug can be easily reproduced with kprobe-tracer.

Thank you,

Ingo Molnar wrote:
> Linus,
> 
> Please pull the latest tracing-fixes-for-linus git tree from:
> 
>    git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus
> 
>  Thanks,
> 
> 	Ingo
> 
> ------------------>
> Heiko Carstens (1):
>       tracing/kprobes: Fix probe parsing
> 
> Lai Jiangshan (1):
>       tracing: Fix circular dead lock in stack trace
> 

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2010-02-14  9:09 Ingo Molnar
  2010-02-15  1:40 ` Masami Hiramatsu
  0 siblings, 1 reply; 53+ messages in thread
From: Ingo Molnar @ 2010-02-14  9:09 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt, Fr??d??ric Weisbecker, Andrew Morton

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Heiko Carstens (1):
      tracing/kprobes: Fix probe parsing

Lai Jiangshan (1):
      tracing: Fix circular dead lock in stack trace


 kernel/trace/trace_kprobe.c |    2 +-
 kernel/trace/trace_stack.c  |   24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6ea90c0..50b1b82 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -689,7 +689,7 @@ static int create_trace_probe(int argc, char **argv)
 			return -EINVAL;
 		}
 		/* an address specified */
-		ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
+		ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
 		if (ret) {
 			pr_info("Failed to parse address.\n");
 			return ret;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 678a512..f4bc9b2 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
 	unsigned long val, flags;
 	char buf[64];
 	int ret;
+	int cpu;
 
 	if (count >= sizeof(buf))
 		return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
 		return ret;
 
 	local_irq_save(flags);
+
+	/*
+	 * In case we trace inside arch_spin_lock() or after (NMI),
+	 * we will cause circular lock, so we also need to increase
+	 * the percpu trace_active here.
+	 */
+	cpu = smp_processor_id();
+	per_cpu(trace_active, cpu)++;
+
 	arch_spin_lock(&max_stack_lock);
 	*ptr = val;
 	arch_spin_unlock(&max_stack_lock);
+
+	per_cpu(trace_active, cpu)--;
 	local_irq_restore(flags);
 
 	return count;
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
+	int cpu;
+
 	local_irq_disable();
+
+	cpu = smp_processor_id();
+	per_cpu(trace_active, cpu)++;
+
 	arch_spin_lock(&max_stack_lock);
 
 	if (*pos == 0)
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 
 static void t_stop(struct seq_file *m, void *p)
 {
+	int cpu;
+
 	arch_spin_unlock(&max_stack_lock);
+
+	cpu = smp_processor_id();
+	per_cpu(trace_active, cpu)--;
+
 	local_irq_enable();
 }
 

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2010-01-31 17:23 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2010-01-31 17:23 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt,
	=?unknown-8bit?B?RnLDqWTDqXJpYw==?= Weisbecker, Andrew Morton

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Mike Frysinger (1):
      tracing/documentation: Cover new frame pointer semantics

Steven Rostedt (3):
      tracing: Prevent kernel oops with corrupted buffer
      ring-buffer: Check if ring buffer iterator has stale data
      ring-buffer: Check for end of page in iterator

Yang Hongyang (1):
      tracing/documentation: Fix a typo in ftrace.txt


 Documentation/trace/ftrace-design.txt |   26 +++++++++++++++++++++++---
 Documentation/trace/ftrace.txt        |    2 +-
 kernel/trace/Kconfig                  |    4 +---
 kernel/trace/ring_buffer.c            |   24 +++++++++++++++++++++---
 kernel/trace/trace.c                  |    5 +++++
 5 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index 239f14b..6a5a579 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -1,5 +1,6 @@
 		function tracer guts
 		====================
+		By Mike Frysinger
 
 Introduction
 ------------
@@ -173,14 +174,16 @@ void ftrace_graph_caller(void)
 
 	unsigned long *frompc = &...;
 	unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
-	prepare_ftrace_return(frompc, selfpc);
+	/* passing frame pointer up is optional -- see below */
+	prepare_ftrace_return(frompc, selfpc, frame_pointer);
 
 	/* restore all state needed by the ABI */
 }
 #endif
 
-For information on how to implement prepare_ftrace_return(), simply look at
-the x86 version.  The only architecture-specific piece in it is the setup of
+For information on how to implement prepare_ftrace_return(), simply look at the
+x86 version (the frame pointer passing is optional; see the next section for
+more information).  The only architecture-specific piece in it is the setup of
 the fault recovery table (the asm(...) code).  The rest should be the same
 across architectures.
 
@@ -205,6 +208,23 @@ void return_to_handler(void)
 #endif
 
 
+HAVE_FUNCTION_GRAPH_FP_TEST
+---------------------------
+
+An arch may pass in a unique value (frame pointer) to both the entering and
+exiting of a function.  On exit, the value is compared and if it does not
+match, then it will panic the kernel.  This is largely a sanity check for bad
+code generation with gcc.  If gcc for your port sanely updates the frame
+pointer under different opitmization levels, then ignore this option.
+
+However, adding support for it isn't terribly difficult.  In your assembly code
+that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument.
+Then in the C version of that function, do what the x86 port does and pass it
+along to ftrace_push_return_trace() instead of a stub value of 0.
+
+Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer.
+
+
 HAVE_FTRACE_NMI_ENTER
 ---------------------
 
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 8179692..bab3040 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1625,7 +1625,7 @@ If I am only interested in sys_nanosleep and hrtimer_interrupt:
 
  # echo sys_nanosleep hrtimer_interrupt \
 		> set_ftrace_filter
- # echo ftrace > current_tracer
+ # echo function > current_tracer
  # echo 1 > tracing_enabled
  # usleep 1
  # echo 0 > tracing_enabled
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 6c22d8a..60e2ce0 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -27,9 +27,7 @@ config HAVE_FUNCTION_GRAPH_TRACER
 config HAVE_FUNCTION_GRAPH_FP_TEST
 	bool
 	help
-	 An arch may pass in a unique value (frame pointer) to both the
-	 entering and exiting of a function. On exit, the value is compared
-	 and if it does not match, then it will panic the kernel.
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	bool
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index edefe3b..8c1b2d2 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -464,6 +464,8 @@ struct ring_buffer_iter {
 	struct ring_buffer_per_cpu	*cpu_buffer;
 	unsigned long			head;
 	struct buffer_page		*head_page;
+	struct buffer_page		*cache_reader_page;
+	unsigned long			cache_read;
 	u64				read_stamp;
 };
 
@@ -2716,6 +2718,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
 		iter->read_stamp = cpu_buffer->read_stamp;
 	else
 		iter->read_stamp = iter->head_page->page->time_stamp;
+	iter->cache_reader_page = cpu_buffer->reader_page;
+	iter->cache_read = cpu_buffer->read;
 }
 
 /**
@@ -3060,13 +3064,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 	struct ring_buffer_event *event;
 	int nr_loops = 0;
 
-	if (ring_buffer_iter_empty(iter))
-		return NULL;
-
 	cpu_buffer = iter->cpu_buffer;
 	buffer = cpu_buffer->buffer;
 
+	/*
+	 * Check if someone performed a consuming read to
+	 * the buffer. A consuming read invalidates the iterator
+	 * and we need to reset the iterator in this case.
+	 */
+	if (unlikely(iter->cache_read != cpu_buffer->read ||
+		     iter->cache_reader_page != cpu_buffer->reader_page))
+		rb_iter_reset(iter);
+
  again:
+	if (ring_buffer_iter_empty(iter))
+		return NULL;
+
 	/*
 	 * We repeat when a timestamp is encountered.
 	 * We can get multiple timestamps by nested interrupts or also
@@ -3081,6 +3094,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 	if (rb_per_cpu_empty(cpu_buffer))
 		return NULL;
 
+	if (iter->head >= local_read(&iter->head_page->page->commit)) {
+		rb_inc_iter(iter);
+		goto again;
+	}
+
 	event = rb_iter_head_event(iter);
 
 	switch (event->type_len) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0df1b0f..eac6875 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -951,6 +951,11 @@ void trace_find_cmdline(int pid, char comm[])
 		return;
 	}
 
+	if (WARN_ON_ONCE(pid < 0)) {
+		strcpy(comm, "<XXX>");
+		return;
+	}
+
 	if (pid > PID_MAX_DEFAULT) {
 		strcpy(comm, "<...>");
 		return;

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-12-31 11:55 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-12-31 11:55 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt,
	=?unknown-8bit?B?RnLDqWTDqXJpYw==?= Weisbecker, Andrew Morton

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Lai Jiangshan (3):
      tracing/kprobe: Show sign of fields in trace_kprobe format files
      tracing/syscalls: Fix typo in SYSCALL_DEFINE0
      tracing: Fix sign fields in ftrace_define_fields_##call()

Li Zefan (4):
      ksym_tracer: Fix to make the tracer work
      ksym_tracer: Fix to allow writing newline to ksym_trace_filter
      ksym_tracer: Fix race when incrementing count
      ksym_tracer: Remove trace_stat

Randy Dunlap (4):
      Documentation: Update mmiotrace.txt
      Documentation: Update tracepoint-analysis.txt
      Documentation: Update ftrace-design.txt
      tracing: Kconfig spelling fixes and cleanups

Steven Rostedt (1):
      tracing: Fix setting tracer specific options


 Documentation/trace/ftrace-design.txt       |   14 ++--
 Documentation/trace/mmiotrace.txt           |   15 ++--
 Documentation/trace/tracepoint-analysis.txt |   60 ++++++------
 include/linux/syscalls.h                    |    2 +-
 include/trace/ftrace.h                      |    7 +-
 kernel/hw_breakpoint.c                      |   10 ++-
 kernel/trace/Kconfig                        |  112 +++++++++++-----------
 kernel/trace/trace.c                        |    2 +-
 kernel/trace/trace_export.c                 |    7 +-
 kernel/trace/trace_kprobe.c                 |    7 +-
 kernel/trace/trace_ksym.c                   |  140 +++++++++++----------------
 11 files changed, 179 insertions(+), 197 deletions(-)

diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index 641a1ef..239f14b 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -53,14 +53,14 @@ size of the mcount call that is embedded in the function).
 For example, if the function foo() calls bar(), when the bar() function calls
 mcount(), the arguments mcount() will pass to the tracer are:
 	"frompc" - the address bar() will use to return to foo()
-	"selfpc" - the address bar() (with _mcount() size adjustment)
+	"selfpc" - the address bar() (with mcount() size adjustment)
 
 Also keep in mind that this mcount function will be called *a lot*, so
 optimizing for the default case of no tracer will help the smooth running of
 your system when tracing is disabled.  So the start of the mcount function is
-typically the bare min with checking things before returning.  That also means
-the code flow should usually kept linear (i.e. no branching in the nop case).
-This is of course an optimization and not a hard requirement.
+typically the bare minimum with checking things before returning.  That also
+means the code flow should usually be kept linear (i.e. no branching in the nop
+case).  This is of course an optimization and not a hard requirement.
 
 Here is some pseudo code that should help (these functions should actually be
 implemented in assembly):
@@ -131,10 +131,10 @@ some functions to save (hijack) and restore the return address.
 
 The mcount function should check the function pointers ftrace_graph_return
 (compare to ftrace_stub) and ftrace_graph_entry (compare to
-ftrace_graph_entry_stub).  If either of those are not set to the relevant stub
+ftrace_graph_entry_stub).  If either of those is not set to the relevant stub
 function, call the arch-specific function ftrace_graph_caller which in turn
 calls the arch-specific function prepare_ftrace_return.  Neither of these
-function names are strictly required, but you should use them anyways to stay
+function names is strictly required, but you should use them anyway to stay
 consistent across the architecture ports -- easier to compare & contrast
 things.
 
@@ -144,7 +144,7 @@ but the first argument should be a pointer to the "frompc".  Typically this is
 located on the stack.  This allows the function to hijack the return address
 temporarily to have it point to the arch-specific function return_to_handler.
 That function will simply call the common ftrace_return_to_handler function and
-that will return the original return address with which, you can return to the
+that will return the original return address with which you can return to the
 original call site.
 
 Here is the updated mcount pseudo code:
diff --git a/Documentation/trace/mmiotrace.txt b/Documentation/trace/mmiotrace.txt
index 162effb..664e738 100644
--- a/Documentation/trace/mmiotrace.txt
+++ b/Documentation/trace/mmiotrace.txt
@@ -44,7 +44,8 @@ Check for lost events.
 Usage
 -----
 
-Make sure debugfs is mounted to /sys/kernel/debug. If not, (requires root privileges)
+Make sure debugfs is mounted to /sys/kernel/debug.
+If not (requires root privileges):
 $ mount -t debugfs debugfs /sys/kernel/debug
 
 Check that the driver you are about to trace is not loaded.
@@ -91,7 +92,7 @@ $ dmesg > dmesg.txt
 $ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt
 and then send the .tar.gz file. The trace compresses considerably. Replace
 "pciid" and "nick" with the PCI ID or model name of your piece of hardware
-under investigation and your nick name.
+under investigation and your nickname.
 
 
 How Mmiotrace Works
@@ -100,7 +101,7 @@ How Mmiotrace Works
 Access to hardware IO-memory is gained by mapping addresses from PCI bus by
 calling one of the ioremap_*() functions. Mmiotrace is hooked into the
 __ioremap() function and gets called whenever a mapping is created. Mapping is
-an event that is recorded into the trace log. Note, that ISA range mappings
+an event that is recorded into the trace log. Note that ISA range mappings
 are not caught, since the mapping always exists and is returned directly.
 
 MMIO accesses are recorded via page faults. Just before __ioremap() returns,
@@ -122,11 +123,11 @@ Trace Log Format
 ----------------
 
 The raw log is text and easily filtered with e.g. grep and awk. One record is
-one line in the log. A record starts with a keyword, followed by keyword
-dependant arguments. Arguments are separated by a space, or continue until the
+one line in the log. A record starts with a keyword, followed by keyword-
+dependent arguments. Arguments are separated by a space, or continue until the
 end of line. The format for version 20070824 is as follows:
 
-Explanation	Keyword	Space separated arguments
+Explanation	Keyword	Space-separated arguments
 ---------------------------------------------------------------------------
 
 read event	R	width, timestamp, map id, physical, value, PC, PID
@@ -136,7 +137,7 @@ iounmap event	UNMAP	timestamp, map id, PC, PID
 marker		MARK	timestamp, text
 version		VERSION	the string "20070824"
 info for reader	LSPCI	one line from lspci -v
-PCI address map	PCIDEV	space separated /proc/bus/pci/devices data
+PCI address map	PCIDEV	space-separated /proc/bus/pci/devices data
 unk. opcode	UNKNOWN	timestamp, map id, physical, data, PC, PID
 
 Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual
diff --git a/Documentation/trace/tracepoint-analysis.txt b/Documentation/trace/tracepoint-analysis.txt
index 5eb4e48..87bee3c 100644
--- a/Documentation/trace/tracepoint-analysis.txt
+++ b/Documentation/trace/tracepoint-analysis.txt
@@ -10,8 +10,8 @@ Tracepoints (see Documentation/trace/tracepoints.txt) can be used without
 creating custom kernel modules to register probe functions using the event
 tracing infrastructure.
 
-Simplistically, tracepoints will represent an important event that when can
-be taken in conjunction with other tracepoints to build a "Big Picture" of
+Simplistically, tracepoints represent important events that can be
+taken in conjunction with other tracepoints to build a "Big Picture" of
 what is going on within the system. There are a large number of methods for
 gathering and interpreting these events. Lacking any current Best Practises,
 this document describes some of the methods that can be used.
@@ -33,12 +33,12 @@ calling
 
 will give a fair indication of the number of events available.
 
-2.2 PCL
+2.2 PCL (Performance Counters for Linux)
 -------
 
-Discovery and enumeration of all counters and events, including tracepoints
+Discovery and enumeration of all counters and events, including tracepoints,
 are available with the perf tool. Getting a list of available events is a
-simple case of
+simple case of:
 
   $ perf list 2>&1 | grep Tracepoint
   ext4:ext4_free_inode                     [Tracepoint event]
@@ -49,19 +49,19 @@ simple case of
   [ .... remaining output snipped .... ]
 
 
-2. Enabling Events
+3. Enabling Events
 ==================
 
-2.1 System-Wide Event Enabling
+3.1 System-Wide Event Enabling
 ------------------------------
 
 See Documentation/trace/events.txt for a proper description on how events
 can be enabled system-wide. A short example of enabling all events related
-to page allocation would look something like
+to page allocation would look something like:
 
   $ for i in `find /sys/kernel/debug/tracing/events -name "enable" | grep mm_`; do echo 1 > $i; done
 
-2.2 System-Wide Event Enabling with SystemTap
+3.2 System-Wide Event Enabling with SystemTap
 ---------------------------------------------
 
 In SystemTap, tracepoints are accessible using the kernel.trace() function
@@ -86,7 +86,7 @@ were allocating the pages.
           print_count()
   }
 
-2.3 System-Wide Event Enabling with PCL
+3.3 System-Wide Event Enabling with PCL
 ---------------------------------------
 
 By specifying the -a switch and analysing sleep, the system-wide events
@@ -107,16 +107,16 @@ for a duration of time can be examined.
 Similarly, one could execute a shell and exit it as desired to get a report
 at that point.
 
-2.4 Local Event Enabling
+3.4 Local Event Enabling
 ------------------------
 
 Documentation/trace/ftrace.txt describes how to enable events on a per-thread
 basis using set_ftrace_pid.
 
-2.5 Local Event Enablement with PCL
+3.5 Local Event Enablement with PCL
 -----------------------------------
 
-Events can be activate and tracked for the duration of a process on a local
+Events can be activated and tracked for the duration of a process on a local
 basis using PCL such as follows.
 
   $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
@@ -131,18 +131,18 @@ basis using PCL such as follows.
 
     0.973913387  seconds time elapsed
 
-3. Event Filtering
+4. Event Filtering
 ==================
 
 Documentation/trace/ftrace.txt covers in-depth how to filter events in
 ftrace.  Obviously using grep and awk of trace_pipe is an option as well
 as any script reading trace_pipe.
 
-4. Analysing Event Variances with PCL
+5. Analysing Event Variances with PCL
 =====================================
 
 Any workload can exhibit variances between runs and it can be important
-to know what the standard deviation in. By and large, this is left to the
+to know what the standard deviation is. By and large, this is left to the
 performance analyst to do it by hand. In the event that the discrete event
 occurrences are useful to the performance analyst, then perf can be used.
 
@@ -166,7 +166,7 @@ In the event that some higher-level event is required that depends on some
 aggregation of discrete events, then a script would need to be developed.
 
 Using --repeat, it is also possible to view how events are fluctuating over
-time on a system wide basis using -a and sleep.
+time on a system-wide basis using -a and sleep.
 
   $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
 		-e kmem:mm_pagevec_free \
@@ -180,7 +180,7 @@ time on a system wide basis using -a and sleep.
 
     1.002251757  seconds time elapsed   ( +-   0.005% )
 
-5. Higher-Level Analysis with Helper Scripts
+6. Higher-Level Analysis with Helper Scripts
 ============================================
 
 When events are enabled the events that are triggering can be read from
@@ -190,11 +190,11 @@ be gathered on-line as appropriate. Examples of post-processing might include
 
   o Reading information from /proc for the PID that triggered the event
   o Deriving a higher-level event from a series of lower-level events.
-  o Calculate latencies between two events
+  o Calculating latencies between two events
 
 Documentation/trace/postprocess/trace-pagealloc-postprocess.pl is an example
 script that can read trace_pipe from STDIN or a copy of a trace. When used
-on-line, it can be interrupted once to generate a report without existing
+on-line, it can be interrupted once to generate a report without exiting
 and twice to exit.
 
 Simplistically, the script just reads STDIN and counts up events but it
@@ -212,12 +212,12 @@ also can do more such as
     processes, the parent process responsible for creating all the helpers
     can be identified
 
-6. Lower-Level Analysis with PCL
+7. Lower-Level Analysis with PCL
 ================================
 
-There may also be a requirement to identify what functions with a program
+There may also be a requirement to identify what functions within a program
 were generating events within the kernel. To begin this sort of analysis, the
-data must be recorded. At the time of writing, this required root
+data must be recorded. At the time of writing, this required root:
 
   $ perf record -c 1 \
 	-e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
@@ -253,11 +253,11 @@ perf report.
   # (For more details, try: perf report --sort comm,dso,symbol)
   #
 
-According to this, the vast majority of events occured triggered on events
-within the VDSO. With simple binaries, this will often be the case so lets
+According to this, the vast majority of events triggered on events
+within the VDSO. With simple binaries, this will often be the case so let's
 take a slightly different example. In the course of writing this, it was
-noticed that X was generating an insane amount of page allocations so lets look
-at it
+noticed that X was generating an insane amount of page allocations so let's look
+at it:
 
   $ perf record -c 1 -f \
 		-e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
@@ -280,8 +280,8 @@ This was interrupted after a few seconds and
   # (For more details, try: perf report --sort comm,dso,symbol)
   #
 
-So, almost half of the events are occuring in a library. To get an idea which
-symbol.
+So, almost half of the events are occurring in a library. To get an idea which
+symbol:
 
   $ perf report --sort comm,dso,symbol
   # Samples: 27666
@@ -297,7 +297,7 @@ symbol.
        0.01%     Xorg  /opt/gfx-test/lib/libpixman-1.so.0.13.1  [.] get_fast_path
        0.00%     Xorg  [kernel]                                 [k] ftrace_trace_userstack
 
-To see where within the function pixmanFillsse2 things are going wrong
+To see where within the function pixmanFillsse2 things are going wrong:
 
   $ perf annotate pixmanFillsse2
   [ ... ]
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 65793e9..207466a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -195,7 +195,7 @@ struct perf_event_attr;
 	static const struct syscall_metadata __used		\
 	  __attribute__((__aligned__(4)))			\
 	  __attribute__((section("__syscalls_metadata")))	\
-	  __syscall_meta_##sname = {				\
+	  __syscall_meta__##sname = {				\
 		.name 		= "sys_"#sname,			\
 		.nb_args 	= 0,				\
 		.enter_event	= &event_enter__##sname,	\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 7352315..c6fe03e 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -414,7 +414,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), 0, FILTER_OTHER);	\
+				 sizeof(field.item),			\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
@@ -422,8 +423,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 #define __dynamic_array(type, item, len)				       \
 	ret = trace_define_field(event_call, "__data_loc " #type "[]", #item,  \
 				 offsetof(typeof(field), __data_loc_##item),   \
-				 sizeof(field.__data_loc_##item), 0,	       \
-				 FILTER_OTHER);
+				 sizeof(field.__data_loc_##item),	       \
+				 is_signed_type(type), FILTER_OTHER);
 
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 366eedf..48fb0bb 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -40,6 +40,7 @@
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/init.h>
+#include <linux/cpu.h>
 #include <linux/smp.h>
 
 #include <linux/hw_breakpoint.h>
@@ -388,7 +389,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
 	if (!cpu_events)
 		return ERR_PTR(-ENOMEM);
 
-	for_each_possible_cpu(cpu) {
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
 		pevent = per_cpu_ptr(cpu_events, cpu);
 		bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
 
@@ -399,18 +401,20 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
 			goto fail;
 		}
 	}
+	put_online_cpus();
 
 	return cpu_events;
 
 fail:
-	for_each_possible_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		pevent = per_cpu_ptr(cpu_events, cpu);
 		if (IS_ERR(*pevent))
 			break;
 		unregister_hw_breakpoint(*pevent);
 	}
+	put_online_cpus();
+
 	free_percpu(cpu_events);
-	/* return the error if any */
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d006554..6c22d8a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -12,17 +12,17 @@ config NOP_TRACER
 config HAVE_FTRACE_NMI_ENTER
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FUNCTION_TRACER
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FUNCTION_GRAPH_TRACER
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FUNCTION_GRAPH_FP_TEST
 	bool
@@ -34,17 +34,17 @@ config HAVE_FUNCTION_GRAPH_FP_TEST
 config HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_DYNAMIC_FTRACE
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FTRACE_MCOUNT_RECORD
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_HW_BRANCH_TRACER
 	bool
@@ -52,7 +52,7 @@ config HAVE_HW_BRANCH_TRACER
 config HAVE_SYSCALL_TRACEPOINTS
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config TRACER_MAX_TRACE
 	bool
@@ -83,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP
 # This allows those options to appear when no other tracer is selected. But the
 # options do not appear when something else selects it. We need the two options
 # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
-# hidding of the automatic options.
+# hiding of the automatic options.
 
 config TRACING
 	bool
@@ -119,7 +119,7 @@ menuconfig FTRACE
 	bool "Tracers"
 	default y if DEBUG_KERNEL
 	help
-	 Enable the kernel tracing infrastructure.
+	  Enable the kernel tracing infrastructure.
 
 if FTRACE
 
@@ -133,7 +133,7 @@ config FUNCTION_TRACER
 	help
 	  Enable the kernel to trace every kernel function. This is done
 	  by using a compiler feature to insert a small, 5-byte No-Operation
-	  instruction to the beginning of every kernel function, which NOP
+	  instruction at the beginning of every kernel function, which NOP
 	  sequence is then dynamically patched into a tracer call when
 	  tracing is enabled by the administrator. If it's runtime disabled
 	  (the bootup default), then the overhead of the instructions is very
@@ -150,7 +150,7 @@ config FUNCTION_GRAPH_TRACER
 	  and its entry.
 	  Its first purpose is to trace the duration of functions and
 	  draw a call graph for each thread with some information like
-	  the return value. This is done by setting the current return 
+	  the return value. This is done by setting the current return
 	  address on the current task structure into a stack of calls.
 
 
@@ -173,7 +173,7 @@ config IRQSOFF_TRACER
 
 	      echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
 
-	  (Note that kernel size and overhead increases with this option
+	  (Note that kernel size and overhead increase with this option
 	  enabled. This option and the preempt-off timing option can be
 	  used together or separately.)
 
@@ -186,7 +186,7 @@ config PREEMPT_TRACER
 	select TRACER_MAX_TRACE
 	select RING_BUFFER_ALLOW_SWAP
 	help
-	  This option measures the time spent in preemption off critical
+	  This option measures the time spent in preemption-off critical
 	  sections, with microsecond accuracy.
 
 	  The default measurement method is a maximum search, which is
@@ -195,7 +195,7 @@ config PREEMPT_TRACER
 
 	      echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
 
-	  (Note that kernel size and overhead increases with this option
+	  (Note that kernel size and overhead increase with this option
 	  enabled. This option and the irqs-off timing option can be
 	  used together or separately.)
 
@@ -222,7 +222,7 @@ config ENABLE_DEFAULT_TRACERS
 	depends on !GENERIC_TRACER
 	select TRACING
 	help
-	  This tracer hooks to various trace points in the kernel
+	  This tracer hooks to various trace points in the kernel,
 	  allowing the user to pick and choose which trace point they
 	  want to trace. It also includes the sched_switch tracer plugin.
 
@@ -265,19 +265,19 @@ choice
 	 The likely/unlikely profiler only looks at the conditions that
 	 are annotated with a likely or unlikely macro.
 
-	 The "all branch" profiler will profile every if statement in the
+	 The "all branch" profiler will profile every if-statement in the
 	 kernel. This profiler will also enable the likely/unlikely
-	 profiler as well.
+	 profiler.
 
-	 Either of the above profilers add a bit of overhead to the system.
-	 If unsure choose "No branch profiling".
+	 Either of the above profilers adds a bit of overhead to the system.
+	 If unsure, choose "No branch profiling".
 
 config BRANCH_PROFILE_NONE
 	bool "No branch profiling"
 	help
-	 No branch profiling. Branch profiling adds a bit of overhead.
-	 Only enable it if you want to analyse the branching behavior.
-	 Otherwise keep it disabled.
+	  No branch profiling. Branch profiling adds a bit of overhead.
+	  Only enable it if you want to analyse the branching behavior.
+	  Otherwise keep it disabled.
 
 config PROFILE_ANNOTATED_BRANCHES
 	bool "Trace likely/unlikely profiler"
@@ -288,7 +288,7 @@ config PROFILE_ANNOTATED_BRANCHES
 
 	  /sys/kernel/debug/tracing/profile_annotated_branch
 
-	  Note: this will add a significant overhead, only turn this
+	  Note: this will add a significant overhead; only turn this
 	  on if you need to profile the system's use of these macros.
 
 config PROFILE_ALL_BRANCHES
@@ -305,7 +305,7 @@ config PROFILE_ALL_BRANCHES
 
 	  This configuration, when enabled, will impose a great overhead
 	  on the system. This should only be enabled when the system
-	  is to be analyzed
+	  is to be analyzed in much detail.
 endchoice
 
 config TRACING_BRANCHES
@@ -335,7 +335,7 @@ config POWER_TRACER
 	depends on X86
 	select GENERIC_TRACER
 	help
-	  This tracer helps developers to analyze and optimize the kernels
+	  This tracer helps developers to analyze and optimize the kernel's
 	  power management decisions, specifically the C-state and P-state
 	  behavior.
 
@@ -391,14 +391,14 @@ config HW_BRANCH_TRACER
 	select GENERIC_TRACER
 	help
 	  This tracer records all branches on the system in a circular
-	  buffer giving access to the last N branches for each cpu.
+	  buffer, giving access to the last N branches for each cpu.
 
 config KMEMTRACE
 	bool "Trace SLAB allocations"
 	select GENERIC_TRACER
 	help
 	  kmemtrace provides tracing for slab allocator functions, such as
-	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
+	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
 	  data is then fed to the userspace application in order to analyse
 	  allocation hotspots, internal fragmentation and so on, making it
 	  possible to see how well an allocator performs, as well as debug
@@ -417,15 +417,15 @@ config WORKQUEUE_TRACER
 	bool "Trace workqueues"
 	select GENERIC_TRACER
 	help
-	  The workqueue tracer provides some statistical informations
+	  The workqueue tracer provides some statistical information
           about each cpu workqueue thread such as the number of the
           works inserted and executed since their creation. It can help
-          to evaluate the amount of work each of them have to perform.
+          to evaluate the amount of work each of them has to perform.
           For example it can help a developer to decide whether he should
-          choose a per cpu workqueue instead of a singlethreaded one.
+          choose a per-cpu workqueue instead of a singlethreaded one.
 
 config BLK_DEV_IO_TRACE
-	bool "Support for tracing block io actions"
+	bool "Support for tracing block IO actions"
 	depends on SYSFS
 	depends on BLOCK
 	select RELAY
@@ -456,15 +456,15 @@ config KPROBE_EVENT
 	select TRACING
 	default y
 	help
-	  This allows the user to add tracing events (similar to tracepoints) on the fly
-	  via the ftrace interface. See Documentation/trace/kprobetrace.txt
-	  for more details.
+	  This allows the user to add tracing events (similar to tracepoints)
+	  on the fly via the ftrace interface. See
+	  Documentation/trace/kprobetrace.txt for more details.
 
 	  Those events can be inserted wherever kprobes can probe, and record
 	  various register and memory values.
 
-	  This option is also required by perf-probe subcommand of perf tools. If
-	  you want to use perf tools, this option is strongly recommended.
+	  This option is also required by perf-probe subcommand of perf tools.
+	  If you want to use perf tools, this option is strongly recommended.
 
 config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
@@ -472,32 +472,32 @@ config DYNAMIC_FTRACE
 	depends on HAVE_DYNAMIC_FTRACE
 	default y
 	help
-         This option will modify all the calls to ftrace dynamically
-	 (will patch them out of the binary image and replaces them
-	 with a No-Op instruction) as they are called. A table is
-	 created to dynamically enable them again.
+          This option will modify all the calls to ftrace dynamically
+	  (will patch them out of the binary image and replace them
+	  with a No-Op instruction) as they are called. A table is
+	  created to dynamically enable them again.
 
-	 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
-	 has native performance as long as no tracing is active.
+	  This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
+	  otherwise has native performance as long as no tracing is active.
 
-	 The changes to the code are done by a kernel thread that
-	 wakes up once a second and checks to see if any ftrace calls
-	 were made. If so, it runs stop_machine (stops all CPUS)
-	 and modifies the code to jump over the call to ftrace.
+	  The changes to the code are done by a kernel thread that
+	  wakes up once a second and checks to see if any ftrace calls
+	  were made. If so, it runs stop_machine (stops all CPUS)
+	  and modifies the code to jump over the call to ftrace.
 
 config FUNCTION_PROFILER
 	bool "Kernel function profiler"
 	depends on FUNCTION_TRACER
 	default n
 	help
-	 This option enables the kernel function profiler. A file is created
-	 in debugfs called function_profile_enabled which defaults to zero.
-	 When a 1 is echoed into this file profiling begins, and when a
-	 zero is entered, profiling stops. A file in the trace_stats
-	 directory called functions, that show the list of functions that
-	 have been hit and their counters.
+	  This option enables the kernel function profiler. A file is created
+	  in debugfs called function_profile_enabled which defaults to zero.
+	  When a 1 is echoed into this file profiling begins, and when a
+	  zero is entered, profiling stops. A "functions" file is created in
+	  the trace_stats directory; this file shows the list of functions that
+	  have been hit and their counters.
 
-	 If in doubt, say N
+	  If in doubt, say N.
 
 config FTRACE_MCOUNT_RECORD
 	def_bool y
@@ -556,8 +556,8 @@ config RING_BUFFER_BENCHMARK
 	tristate "Ring buffer benchmark stress tester"
 	depends on RING_BUFFER
 	help
-	  This option creates a test to stress the ring buffer and bench mark it.
-	  It creates its own ring buffer such that it will not interfer with
+	  This option creates a test to stress the ring buffer and benchmark it.
+	  It creates its own ring buffer such that it will not interfere with
 	  any other users of the ring buffer (such as ftrace). It then creates
 	  a producer and consumer that will run for 10 seconds and sleep for
 	  10 seconds. Each interval it will print out the number of events
@@ -566,7 +566,7 @@ config RING_BUFFER_BENCHMARK
 	  It does not disable interrupts or raise its priority, so it may be
 	  affected by processes that are running.
 
-	  If unsure, say N
+	  If unsure, say N.
 
 endif # FTRACE
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ee61915..d0a4c12 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3949,7 +3949,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	if (!!(topt->flags->val & topt->opt->bit) != val) {
 		mutex_lock(&trace_types_lock);
 		ret = __set_tracer_option(current_trace, topt->flags,
-					  topt->opt, val);
+					  topt->opt, !val);
 		mutex_unlock(&trace_types_lock);
 		if (ret)
 			return ret;
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 458e5bf..d4fa5dc 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused,			\
 	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), 0, FILTER_OTHER);	\
+				 sizeof(field.item),			\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
@@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused,			\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field),		\
 					  container.item),		\
-				 sizeof(field.container.item), 0,	\
-				 FILTER_OTHER);				\
+				 sizeof(field.container.item),		\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7ecab06..83f1e6e 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1182,10 +1182,11 @@ static int __probe_event_show_format(struct trace_seq *s,
 #undef SHOW_FIELD
 #define SHOW_FIELD(type, item, name)					\
 	do {								\
-		ret = trace_seq_printf(s, "\tfield: " #type " %s;\t"	\
-				"offset:%u;\tsize:%u;\n", name,		\
+		ret = trace_seq_printf(s, "\tfield:" #type " %s;\t"	\
+				"offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
 				(unsigned int)offsetof(typeof(field), item),\
-				(unsigned int)sizeof(type));		\
+				(unsigned int)sizeof(type),		\
+				is_signed_type(type));			\
 		if (!ret)						\
 			return 0;					\
 	} while (0)
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index faf37fa..94103cd 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -26,12 +26,13 @@
 #include <linux/fs.h>
 
 #include "trace_output.h"
-#include "trace_stat.h"
 #include "trace.h"
 
 #include <linux/hw_breakpoint.h>
 #include <asm/hw_breakpoint.h>
 
+#include <asm/atomic.h>
+
 /*
  * For now, let us restrict the no. of symbols traced simultaneously to number
  * of available hardware breakpoint registers.
@@ -44,7 +45,7 @@ struct trace_ksym {
 	struct perf_event	**ksym_hbp;
 	struct perf_event_attr	attr;
 #ifdef CONFIG_PROFILE_KSYM_TRACER
-	unsigned long		counter;
+	atomic64_t		counter;
 #endif
 	struct hlist_node	ksym_hlist;
 };
@@ -69,9 +70,8 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
-		if ((entry->attr.bp_addr == hbp_hit_addr) &&
-		    (entry->counter <= MAX_UL_INT)) {
-			entry->counter++;
+		if (entry->attr.bp_addr == hbp_hit_addr) {
+			atomic64_inc(&entry->counter);
 			break;
 		}
 	}
@@ -197,7 +197,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
 	entry->attr.bp_addr = addr;
 	entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
 
-	ret = -EAGAIN;
 	entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
 					ksym_hbp_handler);
 
@@ -300,8 +299,8 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 	 * 2: echo 0 > ksym_trace_filter
 	 * 3: echo "*:---" > ksym_trace_filter
 	 */
-	if (!buf[0] || !strcmp(buf, "0") ||
-	    !strcmp(buf, "*:---")) {
+	if (!input_string[0] || !strcmp(input_string, "0") ||
+	    !strcmp(input_string, "*:---")) {
 		__ksym_trace_reset();
 		ret = 0;
 		goto out;
@@ -444,102 +443,77 @@ struct tracer ksym_tracer __read_mostly =
 	.print_line	= ksym_trace_output
 };
 
-__init static int init_ksym_trace(void)
-{
-	struct dentry *d_tracer;
-	struct dentry *entry;
-
-	d_tracer = tracing_init_dentry();
-	ksym_filter_entry_count = 0;
-
-	entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
-				    NULL, &ksym_tracing_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'ksym_trace_filter' file\n");
-
-	return register_tracer(&ksym_tracer);
-}
-device_initcall(init_ksym_trace);
-
-
 #ifdef CONFIG_PROFILE_KSYM_TRACER
-static int ksym_tracer_stat_headers(struct seq_file *m)
+static int ksym_profile_show(struct seq_file *m, void *v)
 {
+	struct hlist_node *node;
+	struct trace_ksym *entry;
+	int access_type = 0;
+	char fn_name[KSYM_NAME_LEN];
+
 	seq_puts(m, "  Access Type ");
 	seq_puts(m, "  Symbol                                       Counter\n");
 	seq_puts(m, "  ----------- ");
 	seq_puts(m, "  ------                                       -------\n");
-	return 0;
-}
 
-static int ksym_tracer_stat_show(struct seq_file *m, void *v)
-{
-	struct hlist_node *stat = v;
-	struct trace_ksym *entry;
-	int access_type = 0;
-	char fn_name[KSYM_NAME_LEN];
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
 
-	entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
+		access_type = entry->attr.bp_type;
 
-	access_type = entry->attr.bp_type;
+		switch (access_type) {
+		case HW_BREAKPOINT_R:
+			seq_puts(m, "  R           ");
+			break;
+		case HW_BREAKPOINT_W:
+			seq_puts(m, "  W           ");
+			break;
+		case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+			seq_puts(m, "  RW          ");
+			break;
+		default:
+			seq_puts(m, "  NA          ");
+		}
 
-	switch (access_type) {
-	case HW_BREAKPOINT_R:
-		seq_puts(m, "  R           ");
-		break;
-	case HW_BREAKPOINT_W:
-		seq_puts(m, "  W           ");
-		break;
-	case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
-		seq_puts(m, "  RW          ");
-		break;
-	default:
-		seq_puts(m, "  NA          ");
+		if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
+			seq_printf(m, "  %-36s", fn_name);
+		else
+			seq_printf(m, "  %-36s", "<NA>");
+		seq_printf(m, " %15llu\n",
+			   (unsigned long long)atomic64_read(&entry->counter));
 	}
-
-	if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
-		seq_printf(m, "  %-36s", fn_name);
-	else
-		seq_printf(m, "  %-36s", "<NA>");
-	seq_printf(m, " %15lu\n", entry->counter);
+	rcu_read_unlock();
 
 	return 0;
 }
 
-static void *ksym_tracer_stat_start(struct tracer_stat *trace)
+static int ksym_profile_open(struct inode *node, struct file *file)
 {
-	return ksym_filter_head.first;
-}
-
-static void *
-ksym_tracer_stat_next(void *v, int idx)
-{
-	struct hlist_node *stat = v;
-
-	return stat->next;
+	return single_open(file, ksym_profile_show, NULL);
 }
 
-static struct tracer_stat ksym_tracer_stats = {
-	.name = "ksym_tracer",
-	.stat_start = ksym_tracer_stat_start,
-	.stat_next = ksym_tracer_stat_next,
-	.stat_headers = ksym_tracer_stat_headers,
-	.stat_show = ksym_tracer_stat_show
+static const struct file_operations ksym_profile_fops = {
+	.open		= ksym_profile_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
 
-__init static int ksym_tracer_stat_init(void)
+__init static int init_ksym_trace(void)
 {
-	int ret;
+	struct dentry *d_tracer;
 
-	ret = register_stat_tracer(&ksym_tracer_stats);
-	if (ret) {
-		printk(KERN_WARNING "Warning: could not register "
-				    "ksym tracer stats\n");
-		return 1;
-	}
+	d_tracer = tracing_init_dentry();
 
-	return 0;
+	trace_create_file("ksym_trace_filter", 0644, d_tracer,
+			  NULL, &ksym_tracing_fops);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+	trace_create_file("ksym_profile", 0444, d_tracer,
+			  NULL, &ksym_profile_fops);
+#endif
+
+	return register_tracer(&ksym_tracer);
 }
-fs_initcall(ksym_tracer_stat_init);
-#endif /* CONFIG_PROFILE_KSYM_TRACER */
+device_initcall(init_ksym_trace);

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-12-15 20:31 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-12-15 20:31 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt,
	=?unknown-8bit?B?RnLDqWTDqXJpYw==?= Weisbecker, Andrew Morton

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Joe Perches (1):
      ftrace.h: Use common pr_info fmt string

Li Zefan (13):
      tracing: Extract duplicate ftrace_raw_init_event_foo()
      tracing: Pull up calls to trace_define_common_fields()
      tracing: Move a printk out of ftrace_raw_reg_event_foo()
      ftrace: Return EINVAL when writing invalid val to set_ftrace_filter
      ftrace: Call trace_parser_clear() properly
      function-graph: Allow writing the same val to set_graph_function
      tracing: Use seq file for trace_options
      tracing: Use seq file for trace_clock
      tracing: Remove useless trace option
      tracing: Simplify trace_option_write()
      tracing: Change event->profile_count to be int type
      tracing/power: Remove two exports
      ksym_tracer: Fix bad cast

Steven Rostedt (5):
      ring-buffer: Use sync sched protection on ring buffer resizing
      ring-buffer: Move resize integrity check under reader lock
      tracing: Add trace_dump_stack()
      tracing: Add stack trace to irqsoff tracer
      tracing: Fix return of trace_dump_stack()

Thomas Gleixner (1):
      tracing: Fix wrong usage of strstrip in trace_ksyms


 include/linux/ftrace_event.h       |    4 +-
 include/linux/kernel.h             |    3 +
 include/linux/syscalls.h           |    6 +-
 include/trace/ftrace.h             |   56 +----------
 kernel/trace/ftrace.c              |   30 ++++--
 kernel/trace/power-traces.c        |    2 -
 kernel/trace/ring_buffer.c         |   29 ++----
 kernel/trace/trace.c               |  182 +++++++++++++++---------------------
 kernel/trace/trace.h               |   23 ++---
 kernel/trace/trace_event_profile.c |    6 +-
 kernel/trace/trace_events.c        |   41 +++++++--
 kernel/trace/trace_export.c        |    4 -
 kernel/trace/trace_irqsoff.c       |    2 +
 kernel/trace/trace_kprobe.c        |    9 --
 kernel/trace/trace_ksym.c          |   56 +++++------
 kernel/trace/trace_syscalls.c      |   18 +---
 16 files changed, 196 insertions(+), 275 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 38f8d65..2233c98 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -131,7 +131,7 @@ struct ftrace_event_call {
 	void			*mod;
 	void			*data;
 
-	atomic_t		profile_count;
+	int			profile_count;
 	int			(*profile_enable)(struct ftrace_event_call *);
 	void			(*profile_disable)(struct ftrace_event_call *);
 };
@@ -158,7 +158,7 @@ enum {
 	FILTER_PTR_STRING,
 };
 
-extern int trace_define_common_fields(struct ftrace_event_call *call);
+extern int trace_event_raw_init(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
 			      int is_signed, int filter_type);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fa4c59..f1dc752 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -492,6 +492,8 @@ extern int
 __trace_printk(unsigned long ip, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
 
+extern void trace_dump_stack(void);
+
 /*
  * The double __builtin_constant_p is because gcc will give us an error
  * if we try to allocate the static variable to fmt if it is not a
@@ -525,6 +527,7 @@ trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
 static inline void ftrace_off_permanent(void) { }
+static inline void trace_dump_stack(void) { }
 static inline int
 trace_printk(const char *fmt, ...)
 {
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 939a615..65793e9 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -102,12 +102,10 @@ struct perf_event_attr;
 #ifdef CONFIG_EVENT_PROFILE
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
-	.profile_count = ATOMIC_INIT(-1),				       \
 	.profile_enable = prof_sysenter_enable,				       \
 	.profile_disable = prof_sysenter_disable,
 
 #define TRACE_SYS_EXIT_PROFILE_INIT(sname)				       \
-	.profile_count = ATOMIC_INIT(-1),				       \
 	.profile_enable = prof_sysexit_enable,				       \
 	.profile_disable = prof_sysexit_disable,
 #else
@@ -145,7 +143,7 @@ struct perf_event_attr;
 		.name                   = "sys_enter"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &enter_syscall_print_##sname,	\
-		.raw_init		= init_syscall_trace,		\
+		.raw_init		= trace_event_raw_init,		\
 		.show_format		= syscall_enter_format,		\
 		.define_fields		= syscall_enter_define_fields,	\
 		.regfunc		= reg_event_syscall_enter,	\
@@ -167,7 +165,7 @@ struct perf_event_attr;
 		.name                   = "sys_exit"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &exit_syscall_print_##sname,	\
-		.raw_init		= init_syscall_trace,		\
+		.raw_init		= trace_event_raw_init,		\
 		.show_format		= syscall_exit_format,		\
 		.define_fields		= syscall_exit_define_fields,	\
 		.regfunc		= reg_event_syscall_exit,	\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index d1b3de9..7352315 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -436,10 +436,6 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 	struct ftrace_raw_##call field;					\
 	int ret;							\
 									\
-	ret = trace_define_common_fields(event_call);			\
-	if (ret)							\
-		return ret;						\
-									\
 	tstruct;							\
 									\
 	return ret;							\
@@ -559,13 +555,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *
  * static int ftrace_reg_event_<call>(struct ftrace_event_call *unused)
  * {
- *	int ret;
- *
- *	ret = register_trace_<call>(ftrace_event_<call>);
- *	if (!ret)
- *		pr_info("event trace: Could not activate trace point "
- *			"probe to  <call>");
- *	return ret;
+ *	return register_trace_<call>(ftrace_event_<call>);
  * }
  *
  * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
@@ -623,23 +613,12 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *	.trace			= ftrace_raw_output_<call>, <-- stage 2
  * };
  *
- * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused)
- * {
- *	int id;
- *
- *	id = register_ftrace_event(&ftrace_event_type_<call>);
- *	if (!id)
- *		return -ENODEV;
- *	event_<call>.id = id;
- *	return 0;
- * }
- *
  * static struct ftrace_event_call __used
  * __attribute__((__aligned__(4)))
  * __attribute__((section("_ftrace_events"))) event_<call> = {
  *	.name			= "<call>",
  *	.system			= "<system>",
- *	.raw_init		= ftrace_raw_init_event_<call>,
+ *	.raw_init		= trace_event_raw_init,
  *	.regfunc		= ftrace_reg_event_<call>,
  *	.unregfunc		= ftrace_unreg_event_<call>,
  *	.show_format		= ftrace_format_<call>,
@@ -647,13 +626,9 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *
  */
 
-#undef TP_FMT
-#define TP_FMT(fmt, args...)	fmt "\n", ##args
-
 #ifdef CONFIG_EVENT_PROFILE
 
 #define _TRACE_PROFILE_INIT(call)					\
-	.profile_count = ATOMIC_INIT(-1),				\
 	.profile_enable = ftrace_profile_enable_##call,			\
 	.profile_disable = ftrace_profile_disable_##call,
 
@@ -728,13 +703,7 @@ static void ftrace_raw_event_##call(proto)				\
 									\
 static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
 {									\
-	int ret;							\
-									\
-	ret = register_trace_##call(ftrace_raw_event_##call);		\
-	if (ret)							\
-		pr_info("event trace: Could not activate trace point "	\
-			"probe to " #call "\n");			\
-	return ret;							\
+	return register_trace_##call(ftrace_raw_event_##call);		\
 }									\
 									\
 static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
@@ -744,19 +713,7 @@ static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
 									\
 static struct trace_event ftrace_event_type_##call = {			\
 	.trace			= ftrace_raw_output_##call,		\
-};									\
-									\
-static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(&ftrace_event_type_##call);		\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	INIT_LIST_HEAD(&event_##call.fields);				\
-	return 0;							\
-}
+};
 
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
@@ -776,7 +733,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.event			= &ftrace_event_type_##call,		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
+	.raw_init		= trace_event_raw_init,			\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.show_format		= ftrace_format_##template,		\
@@ -793,7 +750,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.event			= &ftrace_event_type_##call,		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
+	.raw_init		= trace_event_raw_init,			\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.show_format		= ftrace_format_##call,			\
@@ -953,7 +910,6 @@ end:									\
 	perf_swevent_put_recursion_context(rctx);			\
 end_recursion:								\
 	local_irq_restore(irq_flags);					\
-									\
 }
 
 #undef DEFINE_EVENT
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e51a1bc..7968762 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1724,7 +1724,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
 	return ftrace_match(str, regex, len, type);
 }
 
-static void ftrace_match_records(char *buff, int len, int enable)
+static int ftrace_match_records(char *buff, int len, int enable)
 {
 	unsigned int search_len;
 	struct ftrace_page *pg;
@@ -1733,6 +1733,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
 	char *search;
 	int type;
 	int not;
+	int found = 0;
 
 	flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
 	type = filter_parse_regex(buff, len, &search, &not);
@@ -1750,6 +1751,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
 				rec->flags &= ~flag;
 			else
 				rec->flags |= flag;
+			found = 1;
 		}
 		/*
 		 * Only enable filtering if we have a function that
@@ -1759,6 +1761,8 @@ static void ftrace_match_records(char *buff, int len, int enable)
 			ftrace_filtered = 1;
 	} while_for_each_ftrace_rec();
 	mutex_unlock(&ftrace_lock);
+
+	return found;
 }
 
 static int
@@ -1780,7 +1784,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
 		return 1;
 }
 
-static void ftrace_match_module_records(char *buff, char *mod, int enable)
+static int ftrace_match_module_records(char *buff, char *mod, int enable)
 {
 	unsigned search_len = 0;
 	struct ftrace_page *pg;
@@ -1789,6 +1793,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
 	char *search = buff;
 	unsigned long flag;
 	int not = 0;
+	int found = 0;
 
 	flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
 
@@ -1819,12 +1824,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
 				rec->flags &= ~flag;
 			else
 				rec->flags |= flag;
+			found = 1;
 		}
 		if (enable && (rec->flags & FTRACE_FL_FILTER))
 			ftrace_filtered = 1;
 
 	} while_for_each_ftrace_rec();
 	mutex_unlock(&ftrace_lock);
+
+	return found;
 }
 
 /*
@@ -1853,8 +1861,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 	if (!strlen(mod))
 		return -EINVAL;
 
-	ftrace_match_module_records(func, mod, enable);
-	return 0;
+	if (ftrace_match_module_records(func, mod, enable))
+		return 0;
+	return -EINVAL;
 }
 
 static struct ftrace_func_command ftrace_mod_cmd = {
@@ -2151,8 +2160,9 @@ static int ftrace_process_regex(char *buff, int len, int enable)
 	func = strsep(&next, ":");
 
 	if (!next) {
-		ftrace_match_records(func, len, enable);
-		return 0;
+		if (ftrace_match_records(func, len, enable))
+			return 0;
+		return ret;
 	}
 
 	/* command found */
@@ -2198,10 +2208,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 	    !trace_parser_cont(parser)) {
 		ret = ftrace_process_regex(parser->buffer,
 					   parser->idx, enable);
+		trace_parser_clear(parser);
 		if (ret)
 			goto out_unlock;
-
-		trace_parser_clear(parser);
 	}
 
 	ret = read;
@@ -2543,10 +2552,9 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 					exists = true;
 					break;
 				}
-			if (!exists) {
+			if (!exists)
 				array[(*idx)++] = rec->ip;
-				found = 1;
-			}
+			found = 1;
 		}
 	} while_for_each_ftrace_rec();
 
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index e06c6e3..9f4f565 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -14,7 +14,5 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/power.h>
 
-EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
-EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
 EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a1ca495..eccb4cf 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1193,9 +1193,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
 	struct list_head *p;
 	unsigned i;
 
-	atomic_inc(&cpu_buffer->record_disabled);
-	synchronize_sched();
-
 	spin_lock_irq(&cpu_buffer->reader_lock);
 	rb_head_page_deactivate(cpu_buffer);
 
@@ -1211,12 +1208,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
 		return;
 
 	rb_reset_cpu(cpu_buffer);
-	spin_unlock_irq(&cpu_buffer->reader_lock);
-
 	rb_check_pages(cpu_buffer);
 
-	atomic_dec(&cpu_buffer->record_disabled);
-
+	spin_unlock_irq(&cpu_buffer->reader_lock);
 }
 
 static void
@@ -1227,9 +1221,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
 	struct list_head *p;
 	unsigned i;
 
-	atomic_inc(&cpu_buffer->record_disabled);
-	synchronize_sched();
-
 	spin_lock_irq(&cpu_buffer->reader_lock);
 	rb_head_page_deactivate(cpu_buffer);
 
@@ -1242,11 +1233,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
 		list_add_tail(&bpage->list, cpu_buffer->pages);
 	}
 	rb_reset_cpu(cpu_buffer);
-	spin_unlock_irq(&cpu_buffer->reader_lock);
-
 	rb_check_pages(cpu_buffer);
 
-	atomic_dec(&cpu_buffer->record_disabled);
+	spin_unlock_irq(&cpu_buffer->reader_lock);
 }
 
 /**
@@ -1254,11 +1243,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
  * @buffer: the buffer to resize.
  * @size: the new size.
  *
- * The tracer is responsible for making sure that the buffer is
- * not being used while changing the size.
- * Note: We may be able to change the above requirement by using
- *  RCU synchronizations.
- *
  * Minimum size is 2 * BUF_PAGE_SIZE.
  *
  * Returns -1 on failure.
@@ -1290,6 +1274,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 	if (size == buffer_size)
 		return size;
 
+	atomic_inc(&buffer->record_disabled);
+
+	/* Make sure all writers are done with this buffer. */
+	synchronize_sched();
+
 	mutex_lock(&buffer->mutex);
 	get_online_cpus();
 
@@ -1352,6 +1341,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 	put_online_cpus();
 	mutex_unlock(&buffer->mutex);
 
+	atomic_dec(&buffer->record_disabled);
+
 	return size;
 
  free_pages:
@@ -1361,6 +1352,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 	}
 	put_online_cpus();
 	mutex_unlock(&buffer->mutex);
+	atomic_dec(&buffer->record_disabled);
 	return -ENOMEM;
 
 	/*
@@ -1370,6 +1362,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
  out_fail:
 	put_online_cpus();
 	mutex_unlock(&buffer->mutex);
+	atomic_dec(&buffer->record_disabled);
 	return -1;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_resize);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 88bd9ae..ee61915 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -313,7 +313,6 @@ static const char *trace_options[] = {
 	"bin",
 	"block",
 	"stacktrace",
-	"sched-tree",
 	"trace_printk",
 	"ftrace_preempt",
 	"branch",
@@ -1151,6 +1150,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
 	__ftrace_trace_stack(tr->buffer, flags, skip, pc);
 }
 
+/**
+ * trace_dump_stack - record a stack back trace in the trace buffer
+ */
+void trace_dump_stack(void)
+{
+	unsigned long flags;
+
+	if (tracing_disabled || tracing_selftest_running)
+		return;
+
+	local_save_flags(flags);
+
+	/* skipping 3 traces, seems to get us at the caller of this function */
+	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
+}
+
 void
 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 {
@@ -2316,67 +2331,49 @@ static const struct file_operations tracing_cpumask_fops = {
 	.write		= tracing_cpumask_write,
 };
 
-static ssize_t
-tracing_trace_options_read(struct file *filp, char __user *ubuf,
-		       size_t cnt, loff_t *ppos)
+static int tracing_trace_options_show(struct seq_file *m, void *v)
 {
 	struct tracer_opt *trace_opts;
 	u32 tracer_flags;
-	int len = 0;
-	char *buf;
-	int r = 0;
 	int i;
 
-
-	/* calculate max size */
-	for (i = 0; trace_options[i]; i++) {
-		len += strlen(trace_options[i]);
-		len += 3; /* "no" and newline */
-	}
-
 	mutex_lock(&trace_types_lock);
 	tracer_flags = current_trace->flags->val;
 	trace_opts = current_trace->flags->opts;
 
-	/*
-	 * Increase the size with names of options specific
-	 * of the current tracer.
-	 */
-	for (i = 0; trace_opts[i].name; i++) {
-		len += strlen(trace_opts[i].name);
-		len += 3; /* "no" and newline */
-	}
-
-	/* +1 for \0 */
-	buf = kmalloc(len + 1, GFP_KERNEL);
-	if (!buf) {
-		mutex_unlock(&trace_types_lock);
-		return -ENOMEM;
-	}
-
 	for (i = 0; trace_options[i]; i++) {
 		if (trace_flags & (1 << i))
-			r += sprintf(buf + r, "%s\n", trace_options[i]);
+			seq_printf(m, "%s\n", trace_options[i]);
 		else
-			r += sprintf(buf + r, "no%s\n", trace_options[i]);
+			seq_printf(m, "no%s\n", trace_options[i]);
 	}
 
 	for (i = 0; trace_opts[i].name; i++) {
 		if (tracer_flags & trace_opts[i].bit)
-			r += sprintf(buf + r, "%s\n",
-				trace_opts[i].name);
+			seq_printf(m, "%s\n", trace_opts[i].name);
 		else
-			r += sprintf(buf + r, "no%s\n",
-				trace_opts[i].name);
+			seq_printf(m, "no%s\n", trace_opts[i].name);
 	}
 	mutex_unlock(&trace_types_lock);
 
-	WARN_ON(r >= len + 1);
+	return 0;
+}
 
-	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+static int __set_tracer_option(struct tracer *trace,
+			       struct tracer_flags *tracer_flags,
+			       struct tracer_opt *opts, int neg)
+{
+	int ret;
 
-	kfree(buf);
-	return r;
+	ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
+	if (ret)
+		return ret;
+
+	if (neg)
+		tracer_flags->val &= ~opts->bit;
+	else
+		tracer_flags->val |= opts->bit;
+	return 0;
 }
 
 /* Try to assign a tracer specific option */
@@ -2384,33 +2381,17 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
 {
 	struct tracer_flags *tracer_flags = trace->flags;
 	struct tracer_opt *opts = NULL;
-	int ret = 0, i = 0;
-	int len;
+	int i;
 
 	for (i = 0; tracer_flags->opts[i].name; i++) {
 		opts = &tracer_flags->opts[i];
-		len = strlen(opts->name);
 
-		if (strncmp(cmp, opts->name, len) == 0) {
-			ret = trace->set_flag(tracer_flags->val,
-				opts->bit, !neg);
-			break;
-		}
+		if (strcmp(cmp, opts->name) == 0)
+			return __set_tracer_option(trace, trace->flags,
+						   opts, neg);
 	}
-	/* Not found */
-	if (!tracer_flags->opts[i].name)
-		return -EINVAL;
-
-	/* Refused to handle */
-	if (ret)
-		return ret;
-
-	if (neg)
-		tracer_flags->val &= ~opts->bit;
-	else
-		tracer_flags->val |= opts->bit;
 
-	return 0;
+	return -EINVAL;
 }
 
 static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2430,7 +2411,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
 			size_t cnt, loff_t *ppos)
 {
 	char buf[64];
-	char *cmp = buf;
+	char *cmp;
 	int neg = 0;
 	int ret;
 	int i;
@@ -2442,16 +2423,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
 		return -EFAULT;
 
 	buf[cnt] = 0;
+	cmp = strstrip(buf);
 
-	if (strncmp(buf, "no", 2) == 0) {
+	if (strncmp(cmp, "no", 2) == 0) {
 		neg = 1;
 		cmp += 2;
 	}
 
 	for (i = 0; trace_options[i]; i++) {
-		int len = strlen(trace_options[i]);
-
-		if (strncmp(cmp, trace_options[i], len) == 0) {
+		if (strcmp(cmp, trace_options[i]) == 0) {
 			set_tracer_flags(1 << i, !neg);
 			break;
 		}
@@ -2471,9 +2451,18 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
 	return cnt;
 }
 
+static int tracing_trace_options_open(struct inode *inode, struct file *file)
+{
+	if (tracing_disabled)
+		return -ENODEV;
+	return single_open(file, tracing_trace_options_show, NULL);
+}
+
 static const struct file_operations tracing_iter_fops = {
-	.open		= tracing_open_generic,
-	.read		= tracing_trace_options_read,
+	.open		= tracing_trace_options_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 	.write		= tracing_trace_options_write,
 };
 
@@ -3392,21 +3381,18 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
 	return cnt;
 }
 
-static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf,
-				  size_t cnt, loff_t *ppos)
+static int tracing_clock_show(struct seq_file *m, void *v)
 {
-	char buf[64];
-	int bufiter = 0;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
-		bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter,
+		seq_printf(m,
 			"%s%s%s%s", i ? " " : "",
 			i == trace_clock_id ? "[" : "", trace_clocks[i].name,
 			i == trace_clock_id ? "]" : "");
-	bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n");
+	seq_putc(m, '\n');
 
-	return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter);
+	return 0;
 }
 
 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
@@ -3448,6 +3434,13 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
 	return cnt;
 }
 
+static int tracing_clock_open(struct inode *inode, struct file *file)
+{
+	if (tracing_disabled)
+		return -ENODEV;
+	return single_open(file, tracing_clock_show, NULL);
+}
+
 static const struct file_operations tracing_max_lat_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_max_lat_read,
@@ -3486,8 +3479,10 @@ static const struct file_operations tracing_mark_fops = {
 };
 
 static const struct file_operations trace_clock_fops = {
-	.open		= tracing_open_generic,
-	.read		= tracing_clock_read,
+	.open		= tracing_clock_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 	.write		= tracing_clock_write,
 };
 
@@ -3948,39 +3943,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	if (ret < 0)
 		return ret;
 
-	ret = 0;
-	switch (val) {
-	case 0:
-		/* do nothing if already cleared */
-		if (!(topt->flags->val & topt->opt->bit))
-			break;
-
-		mutex_lock(&trace_types_lock);
-		if (current_trace->set_flag)
-			ret = current_trace->set_flag(topt->flags->val,
-						      topt->opt->bit, 0);
-		mutex_unlock(&trace_types_lock);
-		if (ret)
-			return ret;
-		topt->flags->val &= ~topt->opt->bit;
-		break;
-	case 1:
-		/* do nothing if already set */
-		if (topt->flags->val & topt->opt->bit)
-			break;
+	if (val != 0 && val != 1)
+		return -EINVAL;
 
+	if (!!(topt->flags->val & topt->opt->bit) != val) {
 		mutex_lock(&trace_types_lock);
-		if (current_trace->set_flag)
-			ret = current_trace->set_flag(topt->flags->val,
-						      topt->opt->bit, 1);
+		ret = __set_tracer_option(current_trace, topt->flags,
+					  topt->opt, val);
 		mutex_unlock(&trace_types_lock);
 		if (ret)
 			return ret;
-		topt->flags->val |= topt->opt->bit;
-		break;
-
-	default:
-		return -EINVAL;
 	}
 
 	*ppos += cnt;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7fa33ca..1b18cb2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -597,18 +597,17 @@ enum trace_iterator_flags {
 	TRACE_ITER_BIN			= 0x40,
 	TRACE_ITER_BLOCK		= 0x80,
 	TRACE_ITER_STACKTRACE		= 0x100,
-	TRACE_ITER_SCHED_TREE		= 0x200,
-	TRACE_ITER_PRINTK		= 0x400,
-	TRACE_ITER_PREEMPTONLY		= 0x800,
-	TRACE_ITER_BRANCH		= 0x1000,
-	TRACE_ITER_ANNOTATE		= 0x2000,
-	TRACE_ITER_USERSTACKTRACE       = 0x4000,
-	TRACE_ITER_SYM_USEROBJ          = 0x8000,
-	TRACE_ITER_PRINTK_MSGONLY	= 0x10000,
-	TRACE_ITER_CONTEXT_INFO		= 0x20000, /* Print pid/cpu/time */
-	TRACE_ITER_LATENCY_FMT		= 0x40000,
-	TRACE_ITER_SLEEP_TIME		= 0x80000,
-	TRACE_ITER_GRAPH_TIME		= 0x100000,
+	TRACE_ITER_PRINTK		= 0x200,
+	TRACE_ITER_PREEMPTONLY		= 0x400,
+	TRACE_ITER_BRANCH		= 0x800,
+	TRACE_ITER_ANNOTATE		= 0x1000,
+	TRACE_ITER_USERSTACKTRACE       = 0x2000,
+	TRACE_ITER_SYM_USEROBJ          = 0x4000,
+	TRACE_ITER_PRINTK_MSGONLY	= 0x8000,
+	TRACE_ITER_CONTEXT_INFO		= 0x10000, /* Print pid/cpu/time */
+	TRACE_ITER_LATENCY_FMT		= 0x20000,
+	TRACE_ITER_SLEEP_TIME		= 0x40000,
+	TRACE_ITER_GRAPH_TIME		= 0x80000,
 };
 
 /*
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index d9c60f8..9e25573 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -25,7 +25,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 	char *buf;
 	int ret = -ENOMEM;
 
-	if (atomic_inc_return(&event->profile_count))
+	if (event->profile_count++ > 0)
 		return 0;
 
 	if (!total_profile_count) {
@@ -56,7 +56,7 @@ fail_buf_nmi:
 		perf_trace_buf = NULL;
 	}
 fail_buf:
-	atomic_dec(&event->profile_count);
+	event->profile_count--;
 
 	return ret;
 }
@@ -83,7 +83,7 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 {
 	char *buf, *nmi_buf;
 
-	if (!atomic_add_negative(-1, &event->profile_count))
+	if (--event->profile_count > 0)
 		return;
 
 	event->profile_disable(event);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1d18315..189b09b 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
 	if (ret)							\
 		return ret;
 
-int trace_define_common_fields(struct ftrace_event_call *call)
+static int trace_define_common_fields(struct ftrace_event_call *call)
 {
 	int ret;
 	struct trace_entry ent;
@@ -91,7 +91,6 @@ int trace_define_common_fields(struct ftrace_event_call *call)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(trace_define_common_fields);
 
 void trace_destroy_fields(struct ftrace_event_call *call)
 {
@@ -105,9 +104,25 @@ void trace_destroy_fields(struct ftrace_event_call *call)
 	}
 }
 
-static void ftrace_event_enable_disable(struct ftrace_event_call *call,
+int trace_event_raw_init(struct ftrace_event_call *call)
+{
+	int id;
+
+	id = register_ftrace_event(call->event);
+	if (!id)
+		return -ENODEV;
+	call->id = id;
+	INIT_LIST_HEAD(&call->fields);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(trace_event_raw_init);
+
+static int ftrace_event_enable_disable(struct ftrace_event_call *call,
 					int enable)
 {
+	int ret = 0;
+
 	switch (enable) {
 	case 0:
 		if (call->enabled) {
@@ -118,12 +133,20 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 		break;
 	case 1:
 		if (!call->enabled) {
-			call->enabled = 1;
 			tracing_start_cmdline_record();
-			call->regfunc(call);
+			ret = call->regfunc(call);
+			if (ret) {
+				tracing_stop_cmdline_record();
+				pr_info("event trace: Could not enable event "
+					"%s\n", call->name);
+				break;
+			}
+			call->enabled = 1;
 		}
 		break;
 	}
+
+	return ret;
 }
 
 static void ftrace_clear_events(void)
@@ -402,7 +425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	case 0:
 	case 1:
 		mutex_lock(&event_mutex);
-		ftrace_event_enable_disable(call, val);
+		ret = ftrace_event_enable_disable(call, val);
 		mutex_unlock(&event_mutex);
 		break;
 
@@ -412,7 +435,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
 	*ppos += cnt;
 
-	return cnt;
+	return ret ? ret : cnt;
 }
 
 static ssize_t
@@ -913,7 +936,9 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 		 		  id);
 
 	if (call->define_fields) {
-		ret = call->define_fields(call);
+		ret = trace_define_common_fields(call);
+		if (!ret)
+			ret = call->define_fields(call);
 		if (ret < 0) {
 			pr_warning("Could not initialize trace point"
 				   " events/%s\n", call->name);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index dff8c84..458e5bf 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -184,10 +184,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\
 	struct struct_name field;					\
 	int ret;							\
 									\
-	ret = trace_define_common_fields(event_call);			\
-	if (ret)							\
-		return ret;						\
-									\
 	tstruct;							\
 									\
 	return ret;							\
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 3aa7eaa..2974bc7 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,6 +151,8 @@ check_critical_timing(struct trace_array *tr,
 		goto out_unlock;
 
 	trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
+	/* Skip 5 functions to get to the irq/preempt enable function */
+	__trace_stack(tr, flags, 5, pc);
 
 	if (data->critical_sequence != max_sequence)
 		goto out_unlock;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b52d397..7ecab06 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1132,10 +1132,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
 	struct kprobe_trace_entry field;
 	struct trace_probe *tp = (struct trace_probe *)event_call->data;
 
-	ret = trace_define_common_fields(event_call);
-	if (ret)
-		return ret;
-
 	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
 	DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
 	/* Set argument names as fields */
@@ -1150,10 +1146,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
 	struct kretprobe_trace_entry field;
 	struct trace_probe *tp = (struct trace_probe *)event_call->data;
 
-	ret = trace_define_common_fields(event_call);
-	if (ret)
-		return ret;
-
 	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
 	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
 	DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
@@ -1453,7 +1445,6 @@ static int register_probe_event(struct trace_probe *tp)
 	call->unregfunc = probe_event_disable;
 
 #ifdef CONFIG_EVENT_PROFILE
-	atomic_set(&call->profile_count, -1);
 	call->profile_enable = probe_profile_enable;
 	call->profile_disable = probe_profile_disable;
 #endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index acb87d4..faf37fa 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -236,7 +236,8 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
 	mutex_lock(&ksym_tracer_mutex);
 
 	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
-		ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr);
+		ret = trace_seq_printf(s, "%pS:",
+				(void *)(unsigned long)entry->attr.bp_addr);
 		if (entry->attr.bp_type == HW_BREAKPOINT_R)
 			ret = trace_seq_puts(s, "r--\n");
 		else if (entry->attr.bp_type == HW_BREAKPOINT_W)
@@ -278,21 +279,20 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 {
 	struct trace_ksym *entry;
 	struct hlist_node *node;
-	char *input_string, *ksymname = NULL;
+	char *buf, *input_string, *ksymname = NULL;
 	unsigned long ksym_addr = 0;
 	int ret, op, changed = 0;
 
-	input_string = kzalloc(count + 1, GFP_KERNEL);
-	if (!input_string)
+	buf = kzalloc(count + 1, GFP_KERNEL);
+	if (!buf)
 		return -ENOMEM;
 
-	if (copy_from_user(input_string, buffer, count)) {
-		kfree(input_string);
-		return -EFAULT;
-	}
-	input_string[count] = '\0';
+	ret = -EFAULT;
+	if (copy_from_user(buf, buffer, count))
+		goto out;
 
-	strstrip(input_string);
+	buf[count] = '\0';
+	input_string = strstrip(buf);
 
 	/*
 	 * Clear all breakpoints if:
@@ -300,18 +300,16 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 	 * 2: echo 0 > ksym_trace_filter
 	 * 3: echo "*:---" > ksym_trace_filter
 	 */
-	if (!input_string[0] || !strcmp(input_string, "0") ||
-	    !strcmp(input_string, "*:---")) {
+	if (!buf[0] || !strcmp(buf, "0") ||
+	    !strcmp(buf, "*:---")) {
 		__ksym_trace_reset();
-		kfree(input_string);
-		return count;
+		ret = 0;
+		goto out;
 	}
 
 	ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
-	if (ret < 0) {
-		kfree(input_string);
-		return ret;
-	}
+	if (ret < 0)
+		goto out;
 
 	mutex_lock(&ksym_tracer_mutex);
 
@@ -322,7 +320,7 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 			if (entry->attr.bp_type != op)
 				changed = 1;
 			else
-				goto out;
+				goto out_unlock;
 			break;
 		}
 	}
@@ -337,28 +335,24 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 			if (IS_ERR(entry->ksym_hbp))
 				ret = PTR_ERR(entry->ksym_hbp);
 			else
-				goto out;
+				goto out_unlock;
 		}
 		/* Error or "symbol:---" case: drop it */
 		ksym_filter_entry_count--;
 		hlist_del_rcu(&(entry->ksym_hlist));
 		synchronize_rcu();
 		kfree(entry);
-		goto out;
+		goto out_unlock;
 	} else {
 		/* Check for malformed request: (4) */
-		if (op == 0)
-			goto out;
-		ret = process_new_ksym_entry(ksymname, op, ksym_addr);
+		if (op)
+			ret = process_new_ksym_entry(ksymname, op, ksym_addr);
 	}
-out:
+out_unlock:
 	mutex_unlock(&ksym_tracer_mutex);
-
-	kfree(input_string);
-
-	if (!ret)
-		ret = count;
-	return ret;
+out:
+	kfree(buf);
+	return !ret ? count : ret;
 }
 
 static const struct file_operations ksym_tracing_fops = {
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 57501d9..75289f3 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -217,10 +217,6 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
 	int i;
 	int offset = offsetof(typeof(trace), args);
 
-	ret = trace_define_common_fields(call);
-	if (ret)
-		return ret;
-
 	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
 	if (ret)
 		return ret;
@@ -241,10 +237,6 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
 	struct syscall_trace_exit trace;
 	int ret;
 
-	ret = trace_define_common_fields(call);
-	if (ret)
-		return ret;
-
 	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
 	if (ret)
 		return ret;
@@ -333,10 +325,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_refcount_enter)
 		ret = register_trace_sys_enter(ftrace_syscall_enter);
-	if (ret) {
-		pr_info("event trace: Could not activate"
-				"syscall entry trace point");
-	} else {
+	if (!ret) {
 		set_bit(num, enabled_enter_syscalls);
 		sys_refcount_enter++;
 	}
@@ -370,10 +359,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_refcount_exit)
 		ret = register_trace_sys_exit(ftrace_syscall_exit);
-	if (ret) {
-		pr_info("event trace: Could not activate"
-				"syscall exit trace point");
-	} else {
+	if (!ret) {
 		set_bit(num, enabled_exit_syscalls);
 		sys_refcount_exit++;
 	}

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-12-10 19:40 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-12-10 19:40 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt,
	=?unknown-8bit?B?RnLDqWTDqXJpYw==?= Weisbecker, Andrew Morton

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Carsten Emde (1):
      tracing: Remove comparing of NULL to va_list in trace_array_vprintk()

Jiri Olsa (1):
      tracing: Fix function graph trace_pipe to properly display failed entries

Johannes Berg (1):
      tracing: Add full state to trace_seq

Steven Rostedt (3):
      tracing: Add pipe_close interface
      tracing: Only call pipe_close if pipe_close is defined
      tracing: Buffer the output of seq_file in case of filled buffer


 include/linux/ftrace_event.h         |    1 +
 include/linux/trace_seq.h            |    7 +-
 kernel/trace/trace.c                 |   57 ++++++++++--
 kernel/trace/trace.h                 |    2 +
 kernel/trace/trace_functions_graph.c |  165 +++++++++++++++++++++++++++-------
 kernel/trace/trace_output.c          |   75 +++++++++++++---
 6 files changed, 248 insertions(+), 59 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 47bbdf9..38f8d65 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -57,6 +57,7 @@ struct trace_iterator {
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
 	struct trace_entry	*ent;
+	int			leftover;
 	int			cpu;
 	u64			ts;
 
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 09077f6..5cf397c 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -14,6 +14,7 @@ struct trace_seq {
 	unsigned char		buffer[PAGE_SIZE];
 	unsigned int		len;
 	unsigned int		readpos;
+	int			full;
 };
 
 static inline void
@@ -21,6 +22,7 @@ trace_seq_init(struct trace_seq *s)
 {
 	s->len = 0;
 	s->readpos = 0;
+	s->full = 0;
 }
 
 /*
@@ -33,7 +35,7 @@ extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
 	__attribute__ ((format (printf, 2, 0)));
 extern int
 trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
-extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
+extern int trace_print_seq(struct seq_file *m, struct trace_seq *s);
 extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
 				 size_t cnt);
 extern int trace_seq_puts(struct trace_seq *s, const char *str);
@@ -55,8 +57,9 @@ trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 	return 0;
 }
 
-static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
+	return 0;
 }
 static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
 				 size_t cnt)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 874f289..88bd9ae 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1361,11 +1361,7 @@ int trace_array_vprintk(struct trace_array *tr,
 	pause_graph_tracing();
 	raw_local_irq_save(irq_flags);
 	__raw_spin_lock(&trace_buf_lock);
-	if (args == NULL) {
-		strncpy(trace_buf, fmt, TRACE_BUF_SIZE);
-		len = strlen(trace_buf);
-	} else
-		len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
 	size = sizeof(*entry) + len + 1;
 	buffer = tr->buffer;
@@ -1516,6 +1512,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
 	int i = (int)*pos;
 	void *ent;
 
+	WARN_ON_ONCE(iter->leftover);
+
 	(*pos)++;
 
 	/* can't go backwards */
@@ -1614,8 +1612,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 			;
 
 	} else {
-		l = *pos - 1;
-		p = s_next(m, p, &l);
+		/*
+		 * If we overflowed the seq_file before, then we want
+		 * to just reuse the trace_seq buffer again.
+		 */
+		if (iter->leftover)
+			p = iter;
+		else {
+			l = *pos - 1;
+			p = s_next(m, p, &l);
+		}
 	}
 
 	trace_event_read_lock();
@@ -1923,6 +1929,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
 static int s_show(struct seq_file *m, void *v)
 {
 	struct trace_iterator *iter = v;
+	int ret;
 
 	if (iter->ent == NULL) {
 		if (iter->tr) {
@@ -1942,9 +1949,27 @@ static int s_show(struct seq_file *m, void *v)
 			if (!(trace_flags & TRACE_ITER_VERBOSE))
 				print_func_help_header(m);
 		}
+	} else if (iter->leftover) {
+		/*
+		 * If we filled the seq_file buffer earlier, we
+		 * want to just show it now.
+		 */
+		ret = trace_print_seq(m, &iter->seq);
+
+		/* ret should this time be zero, but you never know */
+		iter->leftover = ret;
+
 	} else {
 		print_trace_line(iter);
-		trace_print_seq(m, &iter->seq);
+		ret = trace_print_seq(m, &iter->seq);
+		/*
+		 * If we overflow the seq_file buffer, then it will
+		 * ask us for this data again at start up.
+		 * Use that instead.
+		 *  ret is 0 if seq_file write succeeded.
+		 *        -1 otherwise.
+		 */
+		iter->leftover = ret;
 	}
 
 	return 0;
@@ -2898,6 +2923,10 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
 	else
 		cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
 
+
+	if (iter->trace->pipe_close)
+		iter->trace->pipe_close(iter);
+
 	mutex_unlock(&trace_types_lock);
 
 	free_cpumask_var(iter->started);
@@ -3320,6 +3349,16 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 	return cnt;
 }
 
+static int mark_printk(const char *fmt, ...)
+{
+	int ret;
+	va_list args;
+	va_start(args, fmt);
+	ret = trace_vprintk(0, fmt, args);
+	va_end(args);
+	return ret;
+}
+
 static ssize_t
 tracing_mark_write(struct file *filp, const char __user *ubuf,
 					size_t cnt, loff_t *fpos)
@@ -3346,7 +3385,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
 	} else
 		buf[cnt] = '\0';
 
-	cnt = trace_vprintk(0, buf, NULL);
+	cnt = mark_printk("%s", buf);
 	kfree(buf);
 	*fpos += cnt;
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1d7f483..7fa33ca 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -272,6 +272,7 @@ struct tracer_flags {
  * @pipe_open: called when the trace_pipe file is opened
  * @wait_pipe: override how the user waits for traces on trace_pipe
  * @close: called when the trace file is released
+ * @pipe_close: called when the trace_pipe file is released
  * @read: override the default read callback on trace_pipe
  * @splice_read: override the default splice_read callback on trace_pipe
  * @selftest: selftest to run on boot (see trace_selftest.c)
@@ -290,6 +291,7 @@ struct tracer {
 	void			(*pipe_open)(struct trace_iterator *iter);
 	void			(*wait_pipe)(struct trace_iterator *iter);
 	void			(*close)(struct trace_iterator *iter);
+	void			(*pipe_close)(struct trace_iterator *iter);
 	ssize_t			(*read)(struct trace_iterator *iter,
 					struct file *filp, char __user *ubuf,
 					size_t cnt, loff_t *ppos);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 45e6c01..a43d009 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,9 +14,20 @@
 #include "trace.h"
 #include "trace_output.h"
 
-struct fgraph_data {
+struct fgraph_cpu_data {
 	pid_t		last_pid;
 	int		depth;
+	int		ignore;
+};
+
+struct fgraph_data {
+	struct fgraph_cpu_data		*cpu_data;
+
+	/* Place to preserve last processed entry. */
+	struct ftrace_graph_ent_entry	ent;
+	struct ftrace_graph_ret_entry	ret;
+	int				failed;
+	int				cpu;
 };
 
 #define TRACE_GRAPH_INDENT	2
@@ -384,7 +395,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
 	if (!data)
 		return TRACE_TYPE_HANDLED;
 
-	last_pid = &(per_cpu_ptr(data, cpu)->last_pid);
+	last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
 
 	if (*last_pid == pid)
 		return TRACE_TYPE_HANDLED;
@@ -435,26 +446,49 @@ static struct ftrace_graph_ret_entry *
 get_return_for_leaf(struct trace_iterator *iter,
 		struct ftrace_graph_ent_entry *curr)
 {
-	struct ring_buffer_iter *ring_iter;
+	struct fgraph_data *data = iter->private;
+	struct ring_buffer_iter *ring_iter = NULL;
 	struct ring_buffer_event *event;
 	struct ftrace_graph_ret_entry *next;
 
-	ring_iter = iter->buffer_iter[iter->cpu];
+	/*
+	 * If the previous output failed to write to the seq buffer,
+	 * then we just reuse the data from before.
+	 */
+	if (data && data->failed) {
+		curr = &data->ent;
+		next = &data->ret;
+	} else {
 
-	/* First peek to compare current entry and the next one */
-	if (ring_iter)
-		event = ring_buffer_iter_peek(ring_iter, NULL);
-	else {
-	/* We need to consume the current entry to see the next one */
-		ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
-		event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
-					NULL);
-	}
+		ring_iter = iter->buffer_iter[iter->cpu];
+
+		/* First peek to compare current entry and the next one */
+		if (ring_iter)
+			event = ring_buffer_iter_peek(ring_iter, NULL);
+		else {
+			/*
+			 * We need to consume the current entry to see
+			 * the next one.
+			 */
+			ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+			event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
+						 NULL);
+		}
 
-	if (!event)
-		return NULL;
+		if (!event)
+			return NULL;
+
+		next = ring_buffer_event_data(event);
 
-	next = ring_buffer_event_data(event);
+		if (data) {
+			/*
+			 * Save current and next entries for later reference
+			 * if the output fails.
+			 */
+			data->ent = *curr;
+			data->ret = *next;
+		}
+	}
 
 	if (next->ent.type != TRACE_GRAPH_RET)
 		return NULL;
@@ -640,7 +674,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 
 	if (data) {
 		int cpu = iter->cpu;
-		int *depth = &(per_cpu_ptr(data, cpu)->depth);
+		int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
 
 		/*
 		 * Comments display at + 1 to depth. Since
@@ -688,7 +722,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
 
 	if (data) {
 		int cpu = iter->cpu;
-		int *depth = &(per_cpu_ptr(data, cpu)->depth);
+		int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
 
 		*depth = call->depth;
 	}
@@ -782,19 +816,34 @@ static enum print_line_t
 print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
 			struct trace_iterator *iter)
 {
-	int cpu = iter->cpu;
+	struct fgraph_data *data = iter->private;
 	struct ftrace_graph_ent *call = &field->graph_ent;
 	struct ftrace_graph_ret_entry *leaf_ret;
+	static enum print_line_t ret;
+	int cpu = iter->cpu;
 
 	if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
 		return TRACE_TYPE_PARTIAL_LINE;
 
 	leaf_ret = get_return_for_leaf(iter, field);
 	if (leaf_ret)
-		return print_graph_entry_leaf(iter, field, leaf_ret, s);
+		ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
 	else
-		return print_graph_entry_nested(iter, field, s, cpu);
+		ret = print_graph_entry_nested(iter, field, s, cpu);
 
+	if (data) {
+		/*
+		 * If we failed to write our output, then we need to make
+		 * note of it. Because we already consumed our entry.
+		 */
+		if (s->full) {
+			data->failed = 1;
+			data->cpu = cpu;
+		} else
+			data->failed = 0;
+	}
+
+	return ret;
 }
 
 static enum print_line_t
@@ -810,7 +859,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 
 	if (data) {
 		int cpu = iter->cpu;
-		int *depth = &(per_cpu_ptr(data, cpu)->depth);
+		int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
 
 		/*
 		 * Comments display at + 1 to depth. This is the
@@ -873,7 +922,7 @@ print_graph_comment(struct trace_seq *s,  struct trace_entry *ent,
 	int i;
 
 	if (data)
-		depth = per_cpu_ptr(data, iter->cpu)->depth;
+		depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
 
 	if (print_graph_prologue(iter, s, 0, 0))
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -941,8 +990,33 @@ print_graph_comment(struct trace_seq *s,  struct trace_entry *ent,
 enum print_line_t
 print_graph_function(struct trace_iterator *iter)
 {
+	struct ftrace_graph_ent_entry *field;
+	struct fgraph_data *data = iter->private;
 	struct trace_entry *entry = iter->ent;
 	struct trace_seq *s = &iter->seq;
+	int cpu = iter->cpu;
+	int ret;
+
+	if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
+		per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
+		return TRACE_TYPE_HANDLED;
+	}
+
+	/*
+	 * If the last output failed, there's a possibility we need
+	 * to print out the missing entry which would never go out.
+	 */
+	if (data && data->failed) {
+		field = &data->ent;
+		iter->cpu = data->cpu;
+		ret = print_graph_entry(field, s, iter);
+		if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
+			per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
+			ret = TRACE_TYPE_NO_CONSUME;
+		}
+		iter->cpu = cpu;
+		return ret;
+	}
 
 	switch (entry->type) {
 	case TRACE_GRAPH_ENT: {
@@ -952,7 +1026,7 @@ print_graph_function(struct trace_iterator *iter)
 		 * sizeof(struct ftrace_graph_ent_entry) is very small,
 		 * it can be safely saved at the stack.
 		 */
-		struct ftrace_graph_ent_entry *field, saved;
+		struct ftrace_graph_ent_entry saved;
 		trace_assign_type(field, entry);
 		saved = *field;
 		return print_graph_entry(&saved, s, iter);
@@ -1030,31 +1104,54 @@ static void print_graph_headers(struct seq_file *s)
 static void graph_trace_open(struct trace_iterator *iter)
 {
 	/* pid and depth on the last trace processed */
-	struct fgraph_data *data = alloc_percpu(struct fgraph_data);
+	struct fgraph_data *data;
 	int cpu;
 
+	iter->private = NULL;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
-		pr_warning("function graph tracer: not enough memory\n");
-	else
-		for_each_possible_cpu(cpu) {
-			pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid);
-			int *depth = &(per_cpu_ptr(data, cpu)->depth);
-			*pid = -1;
-			*depth = 0;
-		}
+		goto out_err;
+
+	data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
+	if (!data->cpu_data)
+		goto out_err_free;
+
+	for_each_possible_cpu(cpu) {
+		pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
+		int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
+		int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
+		*pid = -1;
+		*depth = 0;
+		*ignore = 0;
+	}
 
 	iter->private = data;
+
+	return;
+
+ out_err_free:
+	kfree(data);
+ out_err:
+	pr_warning("function graph tracer: not enough memory\n");
 }
 
 static void graph_trace_close(struct trace_iterator *iter)
 {
-	free_percpu(iter->private);
+	struct fgraph_data *data = iter->private;
+
+	if (data) {
+		free_percpu(data->cpu_data);
+		kfree(data);
+	}
 }
 
 static struct tracer graph_trace __read_mostly = {
 	.name		= "function_graph",
 	.open		= graph_trace_open,
+	.pipe_open	= graph_trace_open,
 	.close		= graph_trace_close,
+	.pipe_close	= graph_trace_close,
 	.wait_pipe	= poll_wait_pipe,
 	.init		= graph_trace_init,
 	.reset		= graph_trace_reset,
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6c12c6..8e46b33 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
 
-void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
 	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+	int ret;
+
+	ret = seq_write(m, s->buffer, len);
 
-	seq_write(m, s->buffer, len);
+	/*
+	 * Only reset this buffer if we successfully wrote to the
+	 * seq_file buffer.
+	 */
+	if (!ret)
+		trace_seq_init(s);
 
-	trace_seq_init(s);
+	return ret;
 }
 
 enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -85,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 	va_list ap;
 	int ret;
 
-	if (!len)
+	if (s->full || !len)
 		return 0;
 
 	va_start(ap, fmt);
@@ -93,8 +101,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 	va_end(ap);
 
 	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len)
+	if (ret >= len) {
+		s->full = 1;
 		return 0;
+	}
 
 	s->len += ret;
 
@@ -119,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
 	int len = (PAGE_SIZE - 1) - s->len;
 	int ret;
 
-	if (!len)
+	if (s->full || !len)
 		return 0;
 
 	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
 
 	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len)
+	if (ret >= len) {
+		s->full = 1;
 		return 0;
+	}
 
 	s->len += ret;
 
@@ -139,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 	int len = (PAGE_SIZE - 1) - s->len;
 	int ret;
 
-	if (!len)
+	if (s->full || !len)
 		return 0;
 
 	ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
 
 	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len)
+	if (ret >= len) {
+		s->full = 1;
 		return 0;
+	}
 
 	s->len += ret;
 
@@ -167,8 +181,13 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
 {
 	int len = strlen(str);
 
-	if (len > ((PAGE_SIZE - 1) - s->len))
+	if (s->full)
+		return 0;
+
+	if (len > ((PAGE_SIZE - 1) - s->len)) {
+		s->full = 1;
 		return 0;
+	}
 
 	memcpy(s->buffer + s->len, str, len);
 	s->len += len;
@@ -178,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
 
 int trace_seq_putc(struct trace_seq *s, unsigned char c)
 {
-	if (s->len >= (PAGE_SIZE - 1))
+	if (s->full)
 		return 0;
 
+	if (s->len >= (PAGE_SIZE - 1)) {
+		s->full = 1;
+		return 0;
+	}
+
 	s->buffer[s->len++] = c;
 
 	return 1;
@@ -188,9 +212,14 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
 
 int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
 {
-	if (len > ((PAGE_SIZE - 1) - s->len))
+	if (s->full)
 		return 0;
 
+	if (len > ((PAGE_SIZE - 1) - s->len)) {
+		s->full = 1;
+		return 0;
+	}
+
 	memcpy(s->buffer + s->len, mem, len);
 	s->len += len;
 
@@ -203,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
 	const unsigned char *data = mem;
 	int i, j;
 
+	if (s->full)
+		return 0;
+
 #ifdef __BIG_ENDIAN
 	for (i = 0, j = 0; i < len; i++) {
 #else
@@ -220,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
 {
 	void *ret;
 
-	if (len > ((PAGE_SIZE - 1) - s->len))
+	if (s->full)
+		return 0;
+
+	if (len > ((PAGE_SIZE - 1) - s->len)) {
+		s->full = 1;
 		return NULL;
+	}
 
 	ret = s->buffer + s->len;
 	s->len += len;
@@ -233,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
 {
 	unsigned char *p;
 
-	if (s->len >= (PAGE_SIZE - 1))
+	if (s->full)
+		return 0;
+
+	if (s->len >= (PAGE_SIZE - 1)) {
+		s->full = 1;
 		return 0;
+	}
+
 	p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
 	if (!IS_ERR(p)) {
 		p = mangle_path(s->buffer + s->len, p, "\n");
@@ -247,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
 		return 1;
 	}
 
+	s->full = 1;
 	return 0;
 }
 
@@ -373,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
 	unsigned long vmstart = 0;
 	int ret = 1;
 
+	if (s->full)
+		return 0;
+
 	if (mm) {
 		const struct vm_area_struct *vma;
 

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-11-04 15:49 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-11-04 15:49 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt, Frédéric Weisbecker,
	Andrew Morton

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Lai Jiangshan (1):
      ring-buffer: Synchronize resizing buffer with reader lock

Li Zefan (1):
      ftrace: Fix unmatched locking in ftrace_regex_write()


 kernel/trace/ftrace.c      |    6 +++---
 kernel/trace/ring_buffer.c |    2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9c451a1..6dc4e5e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2222,15 +2222,15 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 		ret = ftrace_process_regex(parser->buffer,
 					   parser->idx, enable);
 		if (ret)
-			goto out;
+			goto out_unlock;
 
 		trace_parser_clear(parser);
 	}
 
 	ret = read;
-
+out_unlock:
 	mutex_unlock(&ftrace_regex_lock);
-out:
+
 	return ret;
 }
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3ffa502..5dd017f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1193,6 +1193,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
 	atomic_inc(&cpu_buffer->record_disabled);
 	synchronize_sched();
 
+	spin_lock_irq(&cpu_buffer->reader_lock);
 	rb_head_page_deactivate(cpu_buffer);
 
 	for (i = 0; i < nr_pages; i++) {
@@ -1207,6 +1208,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
 		return;
 
 	rb_reset_cpu(cpu_buffer);
+	spin_unlock_irq(&cpu_buffer->reader_lock);
 
 	rb_check_pages(cpu_buffer);
 

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-11-01 15:26 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-11-01 15:26 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt, Frédéric Weisbecker,
	Andrew Morton

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Jiri Olsa (4):
      tracing: Update *ppos instead of filp->f_pos
      tracing: Fix trace_seq_printf() return value
      tracing: Fix comment typo and documentation example
      tracing: Remove cpu arg from the rb_time_stamp() function


 Documentation/trace/ftrace.txt |    2 ++
 include/linux/trace_seq.h      |    2 +-
 kernel/trace/ftrace.c          |    2 +-
 kernel/trace/ring_buffer.c     |   12 ++++++------
 kernel/trace/trace.c           |    8 ++++----
 kernel/trace/trace_output.c    |    5 ++++-
 6 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 957b22f..8179692 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1231,6 +1231,7 @@ something like this simple program:
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <string.h>
 
 #define _STR(x) #x
 #define STR(x) _STR(x)
@@ -1265,6 +1266,7 @@ const char *find_debugfs(void)
                return NULL;
        }
 
+       strcat(debugfs, "/tracing/");
        debugfs_found = 1;
 
        return debugfs;
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index c134dd1..09077f6 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -7,7 +7,7 @@
 
 /*
  * Trace sequences are used to allow a function to call several other functions
- * to create a string of data to use (up to a max of PAGE_SIZE.
+ * to create a string of data to use (up to a max of PAGE_SIZE).
  */
 
 struct trace_seq {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 37ba67e..9c451a1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -740,7 +740,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
  out:
 	mutex_unlock(&ftrace_profile_lock);
 
-	filp->f_pos += cnt;
+	*ppos += cnt;
 
 	return cnt;
 }
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d4ff019..3ffa502 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -483,7 +483,7 @@ struct ring_buffer_iter {
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
 
-static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu)
+static inline u64 rb_time_stamp(struct ring_buffer *buffer)
 {
 	/* shift to debug/test normalization and TIME_EXTENTS */
 	return buffer->clock() << DEBUG_SHIFT;
@@ -494,7 +494,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
 	u64 time;
 
 	preempt_disable_notrace();
-	time = rb_time_stamp(buffer, cpu);
+	time = rb_time_stamp(buffer);
 	preempt_enable_no_resched_notrace();
 
 	return time;
@@ -599,7 +599,7 @@ static struct list_head *rb_list_head(struct list_head *list)
 }
 
 /*
- * rb_is_head_page - test if the give page is the head page
+ * rb_is_head_page - test if the given page is the head page
  *
  * Because the reader may move the head_page pointer, we can
  * not trust what the head page is (it may be pointing to
@@ -1868,7 +1868,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 		 * Nested commits always have zero deltas, so
 		 * just reread the time stamp
 		 */
-		*ts = rb_time_stamp(buffer, cpu_buffer->cpu);
+		*ts = rb_time_stamp(buffer);
 		next_page->page->time_stamp = *ts;
 	}
 
@@ -2111,7 +2111,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
 		goto out_fail;
 
-	ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
+	ts = rb_time_stamp(cpu_buffer->buffer);
 
 	/*
 	 * Only the first commit can update the timestamp.
@@ -2681,7 +2681,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
 EXPORT_SYMBOL_GPL(ring_buffer_entries);
 
 /**
- * ring_buffer_overrun_cpu - get the number of overruns in buffer
+ * ring_buffer_overruns - get the number of overruns in buffer
  * @buffer: The ring buffer
  *
  * Returns the total number of overruns in the ring buffer
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c820b03..b20d3ec 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2440,7 +2440,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
 			return ret;
 	}
 
-	filp->f_pos += cnt;
+	*ppos += cnt;
 
 	return cnt;
 }
@@ -2582,7 +2582,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
 	}
 	mutex_unlock(&trace_types_lock);
 
-	filp->f_pos += cnt;
+	*ppos += cnt;
 
 	return cnt;
 }
@@ -2764,7 +2764,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
 	if (err)
 		return err;
 
-	filp->f_pos += ret;
+	*ppos += ret;
 
 	return ret;
 }
@@ -3299,7 +3299,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 		}
 	}
 
-	filp->f_pos += cnt;
+	*ppos += cnt;
 
 	/* If check pages failed, return ENOMEM */
 	if (tracing_disabled)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index ed17565..b6c12c6 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -69,6 +69,9 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
  * @s: trace sequence descriptor
  * @fmt: printf format string
  *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, 1 otherwise.
+ *
  * The tracer may use either sequence operations or its own
  * copy to user routines. To simplify formating of a trace
  * trace_seq_printf is used to store strings into a special
@@ -95,7 +98,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 
 	s->len += ret;
 
-	return len;
+	return 1;
 }
 EXPORT_SYMBOL_GPL(trace_seq_printf);
 

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-10-13 18:17 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-10-13 18:17 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt, Frédéric Weisbecker,
	Andrew Morton

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Li Zefan (1):
      tracing/filters: Fix memory leak when setting a filter

Steven Rostedt (1):
      tracing: fix trace_vprintk call


 kernel/trace/trace.c               |    2 +-
 kernel/trace/trace_events_filter.c |    3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4506826..c820b03 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1393,7 +1393,7 @@ int trace_array_vprintk(struct trace_array *tr,
 
 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 {
-	return trace_array_printk(&global_trace, ip, fmt, args);
+	return trace_array_vprintk(&global_trace, ip, fmt, args);
 }
 EXPORT_SYMBOL_GPL(trace_vprintk);
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 2324578..98a6cc5 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -933,8 +933,9 @@ static void postfix_clear(struct filter_parse_state *ps)
 
 	while (!list_empty(&ps->postfix)) {
 		elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
-		kfree(elt->operand);
 		list_del(&elt->list);
+		kfree(elt->operand);
+		kfree(elt);
 	}
 }
 

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-10-08 18:52 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-10-08 18:52 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt, Frédéric Weisbecker

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Frederic Weisbecker (2):
      tracing: Check total refcount before releasing bufs in profile_enable failure
      tracing: Use free_percpu instead of kfree

Hiroshi Shimamoto (1):
      trace: Fix missing assignment in trace_ctxwake_*

Steven Rostedt (3):
      tracing: fix transposed numbers of lock_depth and preempt_count
      ftrace: check for failure for all conversions
      tracing: user local buffer variable for trace branch tracer

Zhenwen Xu (1):
      tracing: fix warning on kernel/trace/trace_branch.c andtrace_hw_branches.c

jolsa@redhat.com (1):
      tracing: correct module boundaries for ftrace_release


 include/linux/ftrace.h             |    2 +-
 kernel/trace/ftrace.c              |   23 +++++++----------------
 kernel/trace/trace_branch.c        |    8 +++++---
 kernel/trace/trace_event_profile.c |   15 ++++++++++-----
 kernel/trace/trace_hw_branches.c   |    8 +++++---
 kernel/trace/trace_output.c        |   18 ++++++++++--------
 6 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index cd3d2ab..0b4f97d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -241,7 +241,7 @@ extern void ftrace_enable_daemon(void);
 # define ftrace_set_filter(buf, len, reset)	do { } while (0)
 # define ftrace_disable_daemon()		do { } while (0)
 # define ftrace_enable_daemon()			do { } while (0)
-static inline void ftrace_release(void *start, unsigned long size) { }
+static inline void ftrace_release_mod(struct module *mod) {}
 static inline int register_ftrace_command(struct ftrace_func_command *cmd)
 {
 	return -EINVAL;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 46592fe..f136fe5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1074,14 +1074,9 @@ static void ftrace_replace_code(int enable)
 		failed = __ftrace_replace_code(rec, enable);
 		if (failed) {
 			rec->flags |= FTRACE_FL_FAILED;
-			if ((system_state == SYSTEM_BOOTING) ||
-			    !core_kernel_text(rec->ip)) {
-				ftrace_free_rec(rec);
-				} else {
-				ftrace_bug(failed, rec->ip);
-					/* Stop processing */
-					return;
-				}
+			ftrace_bug(failed, rec->ip);
+			/* Stop processing */
+			return;
 		}
 	} while_for_each_ftrace_rec();
 }
@@ -2658,19 +2653,17 @@ static int ftrace_convert_nops(struct module *mod,
 }
 
 #ifdef CONFIG_MODULES
-void ftrace_release(void *start, void *end)
+void ftrace_release_mod(struct module *mod)
 {
 	struct dyn_ftrace *rec;
 	struct ftrace_page *pg;
-	unsigned long s = (unsigned long)start;
-	unsigned long e = (unsigned long)end;
 
-	if (ftrace_disabled || !start || start == end)
+	if (ftrace_disabled)
 		return;
 
 	mutex_lock(&ftrace_lock);
 	do_for_each_ftrace_rec(pg, rec) {
-		if ((rec->ip >= s) && (rec->ip < e)) {
+		if (within_module_core(rec->ip, mod)) {
 			/*
 			 * rec->ip is changed in ftrace_free_rec()
 			 * It should not between s and e if record was freed.
@@ -2702,9 +2695,7 @@ static int ftrace_module_notify(struct notifier_block *self,
 				   mod->num_ftrace_callsites);
 		break;
 	case MODULE_STATE_GOING:
-		ftrace_release(mod->ftrace_callsites,
-			       mod->ftrace_callsites +
-			       mod->num_ftrace_callsites);
+		ftrace_release_mod(mod);
 		break;
 	}
 
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 7a7a9fd..4a194f0 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -34,6 +34,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 	struct trace_array *tr = branch_tracer;
 	struct ring_buffer_event *event;
 	struct trace_branch *entry;
+	struct ring_buffer *buffer;
 	unsigned long flags;
 	int cpu, pc;
 	const char *p;
@@ -54,7 +55,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 		goto out;
 
 	pc = preempt_count();
-	event = trace_buffer_lock_reserve(tr, TRACE_BRANCH,
+	buffer = tr->buffer;
+	event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH,
 					  sizeof(*entry), flags, pc);
 	if (!event)
 		goto out;
@@ -74,8 +76,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 	entry->line = f->line;
 	entry->correct = val == expect;
 
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, buffer, event))
+		ring_buffer_unlock_commit(buffer, event);
 
  out:
 	atomic_dec(&tr->data[cpu]->disabled);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index dd44b87..8d5c171 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -31,7 +31,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 	if (atomic_inc_return(&event->profile_count))
 		return 0;
 
-	if (!total_profile_count++) {
+	if (!total_profile_count) {
 		buf = (char *)alloc_percpu(profile_buf_t);
 		if (!buf)
 			goto fail_buf;
@@ -46,14 +46,19 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 	}
 
 	ret = event->profile_enable();
-	if (!ret)
+	if (!ret) {
+		total_profile_count++;
 		return 0;
+	}
 
-	kfree(trace_profile_buf_nmi);
 fail_buf_nmi:
-	kfree(trace_profile_buf);
+	if (!total_profile_count) {
+		free_percpu(trace_profile_buf_nmi);
+		free_percpu(trace_profile_buf);
+		trace_profile_buf_nmi = NULL;
+		trace_profile_buf = NULL;
+	}
 fail_buf:
-	total_profile_count--;
 	atomic_dec(&event->profile_count);
 
 	return ret;
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 23b6385..69543a9 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -165,6 +165,7 @@ void trace_hw_branch(u64 from, u64 to)
 	struct ftrace_event_call *call = &event_hw_branch;
 	struct trace_array *tr = hw_branch_trace;
 	struct ring_buffer_event *event;
+	struct ring_buffer *buf;
 	struct hw_branch_entry *entry;
 	unsigned long irq1;
 	int cpu;
@@ -180,7 +181,8 @@ void trace_hw_branch(u64 from, u64 to)
 	if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
 		goto out;
 
-	event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES,
+	buf = tr->buffer;
+	event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES,
 					  sizeof(*entry), 0, 0);
 	if (!event)
 		goto out;
@@ -189,8 +191,8 @@ void trace_hw_branch(u64 from, u64 to)
 	entry->ent.type = TRACE_HW_BRANCHES;
 	entry->from = from;
 	entry->to   = to;
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		trace_buffer_unlock_commit(tr, event, 0, 0);
+	if (!filter_check_discard(call, entry, buf, event))
+		trace_buffer_unlock_commit(buf, event, 0, 0);
 
  out:
 	atomic_dec(&tr->data[cpu]->disabled);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index f572f44..ed17565 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -486,16 +486,18 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
 				hardirq ? 'h' : softirq ? 's' : '.'))
 		return 0;
 
-	if (entry->lock_depth < 0)
-		ret = trace_seq_putc(s, '.');
+	if (entry->preempt_count)
+		ret = trace_seq_printf(s, "%x", entry->preempt_count);
 	else
-		ret = trace_seq_printf(s, "%d", entry->lock_depth);
+		ret = trace_seq_putc(s, '.');
+
 	if (!ret)
 		return 0;
 
-	if (entry->preempt_count)
-		return trace_seq_printf(s, "%x", entry->preempt_count);
-	return trace_seq_putc(s, '.');
+	if (entry->lock_depth < 0)
+		return trace_seq_putc(s, '.');
+
+	return trace_seq_printf(s, "%d", entry->lock_depth);
 }
 
 static int
@@ -883,7 +885,7 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
 	trace_assign_type(field, iter->ent);
 
 	if (!S)
-		task_state_char(field->prev_state);
+		S = task_state_char(field->prev_state);
 	T = task_state_char(field->next_state);
 	if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
 			      field->prev_pid,
@@ -918,7 +920,7 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
 	trace_assign_type(field, iter->ent);
 
 	if (!S)
-		task_state_char(field->prev_state);
+		S = task_state_char(field->prev_state);
 	T = task_state_char(field->next_state);
 
 	SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-10-02 12:37 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-10-02 12:37 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt, Frédéric Weisbecker

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Matt Fleming (1):
      tracing: Fix infinite recursion in ftrace_update_pid_func()

Paul Mundt (1):
      kmemtrace: Fix up tracer registration


 kernel/trace/ftrace.c    |    4 ++++
 kernel/trace/kmemtrace.c |    2 +-
 2 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 46592fe..3724756 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -225,7 +225,11 @@ static void ftrace_update_pid_func(void)
 	if (ftrace_trace_function == ftrace_stub)
 		return;
 
+#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	func = ftrace_trace_function;
+#else
+	func = __ftrace_trace_function;
+#endif
 
 	if (ftrace_pid_trace) {
 		set_ftrace_pid_function(func);
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 81b1645..a91da69 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -501,7 +501,7 @@ static int __init init_kmem_tracer(void)
 		return 1;
 	}
 
-	if (!register_tracer(&kmem_tracer)) {
+	if (register_tracer(&kmem_tracer) != 0) {
 		pr_warning("Warning: could not register the kmem tracer\n");
 		return 1;
 	}

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: [GIT PULL] tracing fixes
  2009-09-26 16:10     ` Christoph Hellwig
@ 2009-10-01 19:02       ` Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-10-01 19:02 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Linus Torvalds, Linux Kernel Mailing List, Steven Rostedt,
	Fr??d??ric Weisbecker


* Christoph Hellwig <hch@lst.de> wrote:

> On Mon, Sep 21, 2009 at 06:22:11PM +0200, Ingo Molnar wrote:
> > Hm, yes, you are right, we missed that. I noticed the modpost 
> > dependency:
> > 
> >   http://marc.info/?l=linux-kernel&m=125326944223679&w=2
> > 
> > but thought that it's safe and didnt notice that dangling 
> > module_layout() reference and the build warning it generates in the 
> > kernel.
> > 
> > I _think_ the patch below should do the trick - module_layout() appears 
> > to be an artificial symbol with 'significant' symbols listed in its 
> > argument list so that it gets a proper signature. Removing the marker 
> > symbol should thus be the solution, but i'm not 100% sure. Thoughts?
> > 
> > 	Ingo
> 
> Looks good to me, Linus?
> 
> > diff --git a/kernel/module.c b/kernel/module.c
> > index b6ee424..392eb3d 100644
> > --- a/kernel/module.c
> > +++ b/kernel/module.c
> > @@ -2947,7 +2947,6 @@ void module_layout(struct module *mod,
> >  		   struct modversion_info *ver,
> >  		   struct kernel_param *kp,
> >  		   struct kernel_symbol *ks,
> > -		   struct marker *marker,
> >  		   struct tracepoint *tp)
> >  {
> >  }
> ---end quoted text---

Yep, already pushed that to Linus some time ago:

  115e8a2: modules, tracing: Remove stale struct marker signature from module_layout()

	Ingo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [GIT PULL] tracing fixes
  2009-09-21 16:22   ` Ingo Molnar
@ 2009-09-26 16:10     ` Christoph Hellwig
  2009-10-01 19:02       ` Ingo Molnar
  0 siblings, 1 reply; 53+ messages in thread
From: Christoph Hellwig @ 2009-09-26 16:10 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Christoph Hellwig, Linux Kernel Mailing List,
	Steven Rostedt, Fr??d??ric Weisbecker

On Mon, Sep 21, 2009 at 06:22:11PM +0200, Ingo Molnar wrote:
> Hm, yes, you are right, we missed that. I noticed the modpost 
> dependency:
> 
>   http://marc.info/?l=linux-kernel&m=125326944223679&w=2
> 
> but thought that it's safe and didnt notice that dangling 
> module_layout() reference and the build warning it generates in the 
> kernel.
> 
> I _think_ the patch below should do the trick - module_layout() appears 
> to be an artificial symbol with 'significant' symbols listed in its 
> argument list so that it gets a proper signature. Removing the marker 
> symbol should thus be the solution, but i'm not 100% sure. Thoughts?
> 
> 	Ingo

Looks good to me, Linus?

> diff --git a/kernel/module.c b/kernel/module.c
> index b6ee424..392eb3d 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -2947,7 +2947,6 @@ void module_layout(struct module *mod,
>  		   struct modversion_info *ver,
>  		   struct kernel_param *kp,
>  		   struct kernel_symbol *ks,
> -		   struct marker *marker,
>  		   struct tracepoint *tp)
>  {
>  }
---end quoted text---

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-09-26 12:23 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-09-26 12:23 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt,
	=?unknown-8bit?B?RnLDqWTDqXJpYw==?= Weisbecker

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Anton Blanchard (1):
      tracing/workqueue: Use %pf in workqueue trace events

Ingo Molnar (1):
      modules, tracing: Remove stale struct marker signature from module_layout()

Li Hong (1):
      tracing: Fix a comment and a trivial format issue in tracepoint.h

Li Zefan (4):
      tracing: Fix off-by-one in trace_get_user()
      tracing: Check the return value of trace_get_user()
      tracing: Fix failure path in ftrace_graph_write()
      tracing: Fix failure path in ftrace_regex_open()


 include/linux/tracepoint.h       |    4 ++--
 include/trace/events/workqueue.h |    4 ++--
 kernel/module.c                  |    1 -
 kernel/trace/ftrace.c            |   23 +++++++++++++----------
 kernel/trace/trace.c             |    2 +-
 kernel/trace/trace_events.c      |    7 +++----
 6 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 63a3f7a..2aac8a8 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -4,7 +4,7 @@
 /*
  * Kernel Tracepoint API.
  *
- * See Documentation/tracepoint.txt.
+ * See Documentation/trace/tracepoints.txt.
  *
  * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
  *
@@ -36,7 +36,7 @@ struct tracepoint {
 #ifndef DECLARE_TRACE
 
 #define TP_PROTO(args...)	args
-#define TP_ARGS(args...)		args
+#define TP_ARGS(args...)	args
 
 #ifdef CONFIG_TRACEPOINTS
 
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index fcfd9a1..e4612db 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -26,7 +26,7 @@ TRACE_EVENT(workqueue_insertion,
 		__entry->func		= work->func;
 	),
 
-	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+	TP_printk("thread=%s:%d func=%pf", __entry->thread_comm,
 		__entry->thread_pid, __entry->func)
 );
 
@@ -48,7 +48,7 @@ TRACE_EVENT(workqueue_execution,
 		__entry->func		= work->func;
 	),
 
-	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+	TP_printk("thread=%s:%d func=%pf", __entry->thread_comm,
 		__entry->thread_pid, __entry->func)
 );
 
diff --git a/kernel/module.c b/kernel/module.c
index b6ee424..392eb3d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2947,7 +2947,6 @@ void module_layout(struct module *mod,
 		   struct modversion_info *ver,
 		   struct kernel_param *kp,
 		   struct kernel_symbol *ks,
-		   struct marker *marker,
 		   struct tracepoint *tp)
 {
 }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c71e91b..e70af98 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1621,8 +1621,10 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
 		if (!ret) {
 			struct seq_file *m = file->private_data;
 			m->private = iter;
-		} else
+		} else {
+			trace_parser_put(&iter->parser);
 			kfree(iter);
+		}
 	} else
 		file->private_data = iter;
 	mutex_unlock(&ftrace_regex_lock);
@@ -2202,7 +2204,7 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 	struct trace_parser *parser;
 	ssize_t ret, read;
 
-	if (!cnt || cnt < 0)
+	if (!cnt)
 		return 0;
 
 	mutex_lock(&ftrace_regex_lock);
@@ -2216,7 +2218,7 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 	parser = &iter->parser;
 	read = trace_get_user(parser, ubuf, cnt, ppos);
 
-	if (trace_parser_loaded(parser) &&
+	if (read >= 0 && trace_parser_loaded(parser) &&
 	    !trace_parser_cont(parser)) {
 		ret = ftrace_process_regex(parser->buffer,
 					   parser->idx, enable);
@@ -2552,8 +2554,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 		   size_t cnt, loff_t *ppos)
 {
 	struct trace_parser parser;
-	size_t read = 0;
-	ssize_t ret;
+	ssize_t read, ret;
 
 	if (!cnt || cnt < 0)
 		return 0;
@@ -2562,29 +2563,31 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 
 	if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
 		ret = -EBUSY;
-		goto out;
+		goto out_unlock;
 	}
 
 	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
 		ret = -ENOMEM;
-		goto out;
+		goto out_unlock;
 	}
 
 	read = trace_get_user(&parser, ubuf, cnt, ppos);
 
-	if (trace_parser_loaded((&parser))) {
+	if (read >= 0 && trace_parser_loaded((&parser))) {
 		parser.buffer[parser.idx] = 0;
 
 		/* we allow only one expression at a time */
 		ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
 					parser.buffer);
 		if (ret)
-			goto out;
+			goto out_free;
 	}
 
 	ret = read;
- out:
+
+out_free:
 	trace_parser_put(&parser);
+out_unlock:
 	mutex_unlock(&graph_lock);
 
 	return ret;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a35925d..ae17453 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -415,7 +415,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
 
 	/* read the non-space input */
 	while (cnt && !isspace(ch)) {
-		if (parser->idx < parser->size)
+		if (parser->idx < parser->size - 1)
 			parser->buffer[parser->idx++] = ch;
 		else {
 			ret = -EINVAL;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6f03c8a..d128f65 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -232,10 +232,9 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
 		   size_t cnt, loff_t *ppos)
 {
 	struct trace_parser parser;
-	size_t read = 0;
-	ssize_t ret;
+	ssize_t read, ret;
 
-	if (!cnt || cnt < 0)
+	if (!cnt)
 		return 0;
 
 	ret = tracing_update_buffers();
@@ -247,7 +246,7 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
 
 	read = trace_get_user(&parser, ubuf, cnt, ppos);
 
-	if (trace_parser_loaded((&parser))) {
+	if (read >= 0 && trace_parser_loaded((&parser))) {
 		int set = 1;
 
 		if (*parser.buffer == '!')

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: [GIT PULL] tracing fixes
  2009-09-21 16:08 ` Linus Torvalds
@ 2009-09-21 16:22   ` Ingo Molnar
  2009-09-26 16:10     ` Christoph Hellwig
  0 siblings, 1 reply; 53+ messages in thread
From: Ingo Molnar @ 2009-09-21 16:22 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Christoph Hellwig, Linux Kernel Mailing List, Steven Rostedt,
	Fr??d??ric Weisbecker


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Mon, 21 Sep 2009, Ingo Molnar wrote:
> > 
> > Christoph Hellwig (1):
> >       tracing: Remove markers
> 
> This seems to result in:
> 
> 	kernel/module.c:2951: warning: ?struct marker? declared inside parameter list
> 	kernel/module.c:2951: warning: its scope is only this definition or declaration, which is probably not what you want
> 
> Hmm? Incomplete removal?

Hm, yes, you are right, we missed that. I noticed the modpost 
dependency:

  http://marc.info/?l=linux-kernel&m=125326944223679&w=2

but thought that it's safe and didnt notice that dangling 
module_layout() reference and the build warning it generates in the 
kernel.

I _think_ the patch below should do the trick - module_layout() appears 
to be an artificial symbol with 'significant' symbols listed in its 
argument list so that it gets a proper signature. Removing the marker 
symbol should thus be the solution, but i'm not 100% sure. Thoughts?

	Ingo

diff --git a/kernel/module.c b/kernel/module.c
index b6ee424..392eb3d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2947,7 +2947,6 @@ void module_layout(struct module *mod,
 		   struct modversion_info *ver,
 		   struct kernel_param *kp,
 		   struct kernel_symbol *ks,
-		   struct marker *marker,
 		   struct tracepoint *tp)
 {
 }

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: [GIT PULL] tracing fixes
  2009-09-21 13:02 Ingo Molnar
@ 2009-09-21 16:08 ` Linus Torvalds
  2009-09-21 16:22   ` Ingo Molnar
  0 siblings, 1 reply; 53+ messages in thread
From: Linus Torvalds @ 2009-09-21 16:08 UTC (permalink / raw)
  To: Ingo Molnar, Christoph Hellwig
  Cc: Linux Kernel Mailing List, Steven Rostedt, Fr??d??ric Weisbecker



On Mon, 21 Sep 2009, Ingo Molnar wrote:
> 
> Christoph Hellwig (1):
>       tracing: Remove markers

This seems to result in:

	kernel/module.c:2951: warning: ‘struct marker’ declared inside parameter list
	kernel/module.c:2951: warning: its scope is only this definition or declaration, which is probably not what you want

Hmm? Incomplete removal?

		Linus

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-09-21 13:02 Ingo Molnar
  2009-09-21 16:08 ` Linus Torvalds
  0 siblings, 1 reply; 53+ messages in thread
From: Ingo Molnar @ 2009-09-21 13:02 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Steven Rostedt, Fr??d??ric Weisbecker

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Alexey Dobriyan (1):
      kernel/profile.c: Switch /proc/irq/prof_cpu_mask to seq_file

Christoph Hellwig (1):
      tracing: Remove markers

Frederic Weisbecker (2):
      tracing: Factorize the events profile accounting
      tracing: Allocate the ftrace event profile buffer dynamically

Li Zefan (3):
      function-graph: use ftrace_graph_funcs directly
      tracing: remove max_tracer_type_len
      tracing/events: use list_for_entry_continue

Peter Zijlstra (1):
      tracing: Export trace_profile_buf symbols


 Documentation/markers.txt                 |  104 ----
 arch/powerpc/platforms/cell/spufs/file.c  |    1 -
 arch/powerpc/platforms/cell/spufs/sched.c |    1 -
 include/linux/ftrace_event.h              |   10 +-
 include/linux/kvm_host.h                  |    1 -
 include/linux/marker.h                    |  221 -------
 include/linux/module.h                    |   11 -
 include/linux/syscalls.h                  |   24 +-
 include/trace/ftrace.h                    |  111 ++--
 init/Kconfig                              |    7 -
 kernel/Makefile                           |    1 -
 kernel/marker.c                           |  930 -----------------------------
 kernel/module.c                           |   18 -
 kernel/profile.c                          |   45 +-
 kernel/trace/ftrace.c                     |   23 +-
 kernel/trace/trace.c                      |   49 +-
 kernel/trace/trace_event_profile.c        |   82 +++-
 kernel/trace/trace_events.c               |   49 +-
 kernel/trace/trace_printk.c               |    1 -
 kernel/trace/trace_syscalls.c             |   97 +++-
 samples/Kconfig                           |    6 -
 samples/Makefile                          |    2 +-
 samples/markers/Makefile                  |    4 -
 samples/markers/marker-example.c          |   53 --
 samples/markers/probe-example.c           |   92 ---
 scripts/Makefile.modpost                  |   12 -
 26 files changed, 297 insertions(+), 1658 deletions(-)
 delete mode 100644 Documentation/markers.txt
 delete mode 100644 include/linux/marker.h
 delete mode 100644 kernel/marker.c
 delete mode 100644 samples/markers/Makefile
 delete mode 100644 samples/markers/marker-example.c
 delete mode 100644 samples/markers/probe-example.c

diff --git a/Documentation/markers.txt b/Documentation/markers.txt
deleted file mode 100644
index d2b3d0e..0000000
--- a/Documentation/markers.txt
+++ /dev/null
@@ -1,104 +0,0 @@
- 	             Using the Linux Kernel Markers
-
-			    Mathieu Desnoyers
-
-
-This document introduces Linux Kernel Markers and their use. It provides
-examples of how to insert markers in the kernel and connect probe functions to
-them and provides some examples of probe functions.
-
-
-* Purpose of markers
-
-A marker placed in code provides a hook to call a function (probe) that you can
-provide at runtime. A marker can be "on" (a probe is connected to it) or "off"
-(no probe is attached). When a marker is "off" it has no effect, except for
-adding a tiny time penalty (checking a condition for a branch) and space
-penalty (adding a few bytes for the function call at the end of the
-instrumented function and adds a data structure in a separate section).  When a
-marker is "on", the function you provide is called each time the marker is
-executed, in the execution context of the caller. When the function provided
-ends its execution, it returns to the caller (continuing from the marker site).
-
-You can put markers at important locations in the code. Markers are
-lightweight hooks that can pass an arbitrary number of parameters,
-described in a printk-like format string, to the attached probe function.
-
-They can be used for tracing and performance accounting.
-
-
-* Usage
-
-In order to use the macro trace_mark, you should include linux/marker.h.
-
-#include <linux/marker.h>
-
-And,
-
-trace_mark(subsystem_event, "myint %d mystring %s", someint, somestring);
-Where :
-- subsystem_event is an identifier unique to your event
-    - subsystem is the name of your subsystem.
-    - event is the name of the event to mark.
-- "myint %d mystring %s" is the formatted string for the serializer. "myint" and
-  "mystring" are repectively the field names associated with the first and
-  second parameter.
-- someint is an integer.
-- somestring is a char pointer.
-
-Connecting a function (probe) to a marker is done by providing a probe (function
-to call) for the specific marker through marker_probe_register() and can be
-activated by calling marker_arm(). Marker deactivation can be done by calling
-marker_disarm() as many times as marker_arm() has been called. Removing a probe
-is done through marker_probe_unregister(); it will disarm the probe.
-
-marker_synchronize_unregister() must be called between probe unregistration and
-the first occurrence of
-- the end of module exit function,
-  to make sure there is no caller left using the probe;
-- the free of any resource used by the probes,
-  to make sure the probes wont be accessing invalid data.
-This, and the fact that preemption is disabled around the probe call, make sure
-that probe removal and module unload are safe. See the "Probe example" section
-below for a sample probe module.
-
-The marker mechanism supports inserting multiple instances of the same marker.
-Markers can be put in inline functions, inlined static functions, and
-unrolled loops as well as regular functions.
-
-The naming scheme "subsystem_event" is suggested here as a convention intended
-to limit collisions. Marker names are global to the kernel: they are considered
-as being the same whether they are in the core kernel image or in modules.
-Conflicting format strings for markers with the same name will cause the markers
-to be detected to have a different format string not to be armed and will output
-a printk warning which identifies the inconsistency:
-
-"Format mismatch for probe probe_name (format), marker (format)"
-
-Another way to use markers is to simply define the marker without generating any
-function call to actually call into the marker. This is useful in combination
-with tracepoint probes in a scheme like this :
-
-void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk);
-
-DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name,
-	"arg1 %u pid %d");
-
-notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk)
-{
-	struct marker *marker = &GET_MARKER(kernel_irq_entry);
-	/* write data to trace buffers ... */
-}
-
-* Probe / marker example
-
-See the example provided in samples/markers/src
-
-Compile them with your kernel.
-
-Run, as root :
-modprobe marker-example (insmod order is not important)
-modprobe probe-example
-cat /proc/marker-example (returns an expected error)
-rmmod marker-example probe-example
-dmesg
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index ab8aef9..8f079b8 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -29,7 +29,6 @@
 #include <linux/poll.h>
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
-#include <linux/marker.h>
 
 #include <asm/io.h>
 #include <asm/time.h>
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index bb5b77c..4678078 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -39,7 +39,6 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/marker.h>
 
 #include <asm/io.h>
 #include <asm/mmu_context.h>
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index bd099ba..4ec5e67 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -4,6 +4,7 @@
 #include <linux/ring_buffer.h>
 #include <linux/trace_seq.h>
 #include <linux/percpu.h>
+#include <linux/hardirq.h>
 
 struct trace_array;
 struct tracer;
@@ -130,10 +131,15 @@ struct ftrace_event_call {
 	void			*data;
 
 	atomic_t		profile_count;
-	int			(*profile_enable)(struct ftrace_event_call *);
-	void			(*profile_disable)(struct ftrace_event_call *);
+	int			(*profile_enable)(void);
+	void			(*profile_disable)(void);
 };
 
+#define FTRACE_MAX_PROFILE_SIZE	2048
+
+extern char			*trace_profile_buf;
+extern char			*trace_profile_buf_nmi;
+
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4af5603..b7bbb5d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -15,7 +15,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/preempt.h>
-#include <linux/marker.h>
 #include <linux/msi.h>
 #include <asm/signal.h>
 
diff --git a/include/linux/marker.h b/include/linux/marker.h
deleted file mode 100644
index b85e74c..0000000
--- a/include/linux/marker.h
+++ /dev/null
@@ -1,221 +0,0 @@
-#ifndef _LINUX_MARKER_H
-#define _LINUX_MARKER_H
-
-/*
- * Code markup for dynamic and static tracing.
- *
- * See Documentation/marker.txt.
- *
- * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
- */
-
-#include <stdarg.h>
-#include <linux/types.h>
-
-struct module;
-struct marker;
-
-/**
- * marker_probe_func - Type of a marker probe function
- * @probe_private: probe private data
- * @call_private: call site private data
- * @fmt: format string
- * @args: variable argument list pointer. Use a pointer to overcome C's
- *        inability to pass this around as a pointer in a portable manner in
- *        the callee otherwise.
- *
- * Type of marker probe functions. They receive the mdata and need to parse the
- * format string to recover the variable argument list.
- */
-typedef void marker_probe_func(void *probe_private, void *call_private,
-		const char *fmt, va_list *args);
-
-struct marker_probe_closure {
-	marker_probe_func *func;	/* Callback */
-	void *probe_private;		/* Private probe data */
-};
-
-struct marker {
-	const char *name;	/* Marker name */
-	const char *format;	/* Marker format string, describing the
-				 * variable argument list.
-				 */
-	char state;		/* Marker state. */
-	char ptype;		/* probe type : 0 : single, 1 : multi */
-				/* Probe wrapper */
-	void (*call)(const struct marker *mdata, void *call_private, ...);
-	struct marker_probe_closure single;
-	struct marker_probe_closure *multi;
-	const char *tp_name;	/* Optional tracepoint name */
-	void *tp_cb;		/* Optional tracepoint callback */
-} __attribute__((aligned(8)));
-
-#ifdef CONFIG_MARKERS
-
-#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format)		\
-		static const char __mstrtab_##name[]			\
-		__attribute__((section("__markers_strings")))		\
-		= #name "\0" format;					\
-		static struct marker __mark_##name			\
-		__attribute__((section("__markers"), aligned(8))) =	\
-		{ __mstrtab_##name, &__mstrtab_##name[sizeof(#name)],	\
-		  0, 0, marker_probe_cb, { __mark_empty_function, NULL},\
-		  NULL, tp_name_str, tp_cb }
-
-#define DEFINE_MARKER(name, format)					\
-		_DEFINE_MARKER(name, NULL, NULL, format)
-
-#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format)			\
-		_DEFINE_MARKER(name, #tp_name, tp_cb, format)
-
-/*
- * Note : the empty asm volatile with read constraint is used here instead of a
- * "used" attribute to fix a gcc 4.1.x bug.
- * Make sure the alignment of the structure in the __markers section will
- * not add unwanted padding between the beginning of the section and the
- * structure. Force alignment to the same alignment as the section start.
- *
- * The "generic" argument controls which marker enabling mechanism must be used.
- * If generic is true, a variable read is used.
- * If generic is false, immediate values are used.
- */
-#define __trace_mark(generic, name, call_private, format, args...)	\
-	do {								\
-		DEFINE_MARKER(name, format);				\
-		__mark_check_format(format, ## args);			\
-		if (unlikely(__mark_##name.state)) {			\
-			(*__mark_##name.call)				\
-				(&__mark_##name, call_private, ## args);\
-		}							\
-	} while (0)
-
-#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
-	do {								\
-		void __check_tp_type(void)				\
-		{							\
-			register_trace_##tp_name(tp_cb);		\
-		}							\
-		DEFINE_MARKER_TP(name, tp_name, tp_cb, format);		\
-		__mark_check_format(format, ## args);			\
-		(*__mark_##name.call)(&__mark_##name, call_private,	\
-					## args);			\
-	} while (0)
-
-extern void marker_update_probe_range(struct marker *begin,
-	struct marker *end);
-
-#define GET_MARKER(name)	(__mark_##name)
-
-#else /* !CONFIG_MARKERS */
-#define DEFINE_MARKER(name, tp_name, tp_cb, format)
-#define __trace_mark(generic, name, call_private, format, args...) \
-		__mark_check_format(format, ## args)
-#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
-	do {								\
-		void __check_tp_type(void)				\
-		{							\
-			register_trace_##tp_name(tp_cb);		\
-		}							\
-		__mark_check_format(format, ## args);			\
-	} while (0)
-static inline void marker_update_probe_range(struct marker *begin,
-	struct marker *end)
-{ }
-#define GET_MARKER(name)
-#endif /* CONFIG_MARKERS */
-
-/**
- * trace_mark - Marker using code patching
- * @name: marker name, not quoted.
- * @format: format string
- * @args...: variable argument list
- *
- * Places a marker using optimized code patching technique (imv_read())
- * to be enabled when immediate values are present.
- */
-#define trace_mark(name, format, args...) \
-	__trace_mark(0, name, NULL, format, ## args)
-
-/**
- * _trace_mark - Marker using variable read
- * @name: marker name, not quoted.
- * @format: format string
- * @args...: variable argument list
- *
- * Places a marker using a standard memory read (_imv_read()) to be
- * enabled. Should be used for markers in code paths where instruction
- * modification based enabling is not welcome. (__init and __exit functions,
- * lockdep, some traps, printk).
- */
-#define _trace_mark(name, format, args...) \
-	__trace_mark(1, name, NULL, format, ## args)
-
-/**
- * trace_mark_tp - Marker in a tracepoint callback
- * @name: marker name, not quoted.
- * @tp_name: tracepoint name, not quoted.
- * @tp_cb: tracepoint callback. Should have an associated global symbol so it
- *         is not optimized away by the compiler (should not be static).
- * @format: format string
- * @args...: variable argument list
- *
- * Places a marker in a tracepoint callback.
- */
-#define trace_mark_tp(name, tp_name, tp_cb, format, args...)	\
-	__trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args)
-
-/**
- * MARK_NOARGS - Format string for a marker with no argument.
- */
-#define MARK_NOARGS " "
-
-/* To be used for string format validity checking with gcc */
-static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...)
-{
-}
-
-#define __mark_check_format(format, args...)				\
-	do {								\
-		if (0)							\
-			___mark_check_format(format, ## args);		\
-	} while (0)
-
-extern marker_probe_func __mark_empty_function;
-
-extern void marker_probe_cb(const struct marker *mdata,
-	void *call_private, ...);
-
-/*
- * Connect a probe to a marker.
- * private data pointer must be a valid allocated memory address, or NULL.
- */
-extern int marker_probe_register(const char *name, const char *format,
-				marker_probe_func *probe, void *probe_private);
-
-/*
- * Returns the private data given to marker_probe_register.
- */
-extern int marker_probe_unregister(const char *name,
-	marker_probe_func *probe, void *probe_private);
-/*
- * Unregister a marker by providing the registered private data.
- */
-extern int marker_probe_unregister_private_data(marker_probe_func *probe,
-	void *probe_private);
-
-extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
-	int num);
-
-/*
- * marker_synchronize_unregister must be called between the last marker probe
- * unregistration and the first one of
- * - the end of module exit function
- * - the free of any resource used by the probes
- * to ensure the code and data are valid for any possibly running probes.
- */
-#define marker_synchronize_unregister() synchronize_sched()
-
-#endif
diff --git a/include/linux/module.h b/include/linux/module.h
index f8f92d0..1c755b2 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -15,7 +15,6 @@
 #include <linux/stringify.h>
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
-#include <linux/marker.h>
 #include <linux/tracepoint.h>
 
 #include <asm/local.h>
@@ -327,10 +326,6 @@ struct module
 	/* The command line arguments (may be mangled).  People like
 	   keeping pointers to this stuff */
 	char *args;
-#ifdef CONFIG_MARKERS
-	struct marker *markers;
-	unsigned int num_markers;
-#endif
 #ifdef CONFIG_TRACEPOINTS
 	struct tracepoint *tracepoints;
 	unsigned int num_tracepoints;
@@ -535,8 +530,6 @@ int unregister_module_notifier(struct notifier_block * nb);
 
 extern void print_modules(void);
 
-extern void module_update_markers(void);
-
 extern void module_update_tracepoints(void);
 extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
 
@@ -651,10 +644,6 @@ static inline void print_modules(void)
 {
 }
 
-static inline void module_update_markers(void)
-{
-}
-
 static inline void module_update_tracepoints(void)
 {
 }
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a8e3782..7d9803c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -100,33 +100,25 @@ struct perf_counter_attr;
 
 #ifdef CONFIG_EVENT_PROFILE
 #define TRACE_SYS_ENTER_PROFILE(sname)					       \
-static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call)  \
+static int prof_sysenter_enable_##sname(void)				       \
 {									       \
-	int ret = 0;							       \
-	if (!atomic_inc_return(&event_enter_##sname.profile_count))	       \
-		ret = reg_prof_syscall_enter("sys"#sname);		       \
-	return ret;							       \
+	return reg_prof_syscall_enter("sys"#sname);			       \
 }									       \
 									       \
-static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\
+static void prof_sysenter_disable_##sname(void)				       \
 {									       \
-	if (atomic_add_negative(-1, &event_enter_##sname.profile_count))       \
-		unreg_prof_syscall_enter("sys"#sname);			       \
+	unreg_prof_syscall_enter("sys"#sname);				       \
 }
 
 #define TRACE_SYS_EXIT_PROFILE(sname)					       \
-static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call)   \
+static int prof_sysexit_enable_##sname(void)				       \
 {									       \
-	int ret = 0;							       \
-	if (!atomic_inc_return(&event_exit_##sname.profile_count))	       \
-		ret = reg_prof_syscall_exit("sys"#sname);		       \
-	return ret;							       \
+	return reg_prof_syscall_exit("sys"#sname);			       \
 }									       \
 									       \
-static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
+static void prof_sysexit_disable_##sname(void)				       \
 {                                                                              \
-	if (atomic_add_negative(-1, &event_exit_##sname.profile_count))	       \
-		unreg_prof_syscall_exit("sys"#sname);			       \
+	unreg_prof_syscall_exit("sys"#sname);				       \
 }
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 72a3b43..a0361cb 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -382,20 +382,14 @@ static inline int ftrace_get_offsets_##call(				\
  *
  * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
  *
- * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call)
+ * static int ftrace_profile_enable_<call>(void)
  * {
- * 	int ret = 0;
- *
- * 	if (!atomic_inc_return(&event_call->profile_count))
- * 		ret = register_trace_<call>(ftrace_profile_<call>);
- *
- * 	return ret;
+ * 	return register_trace_<call>(ftrace_profile_<call>);
  * }
  *
- * static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call)
+ * static void ftrace_profile_disable_<call>(void)
  * {
- * 	if (atomic_add_negative(-1, &event->call->profile_count))
- * 		unregister_trace_<call>(ftrace_profile_<call>);
+ * 	unregister_trace_<call>(ftrace_profile_<call>);
  * }
  *
  */
@@ -405,20 +399,14 @@ static inline int ftrace_get_offsets_##call(				\
 									\
 static void ftrace_profile_##call(proto);				\
 									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
+static int ftrace_profile_enable_##call(void)				\
 {									\
-	int ret = 0;							\
-									\
-	if (!atomic_inc_return(&event_call->profile_count))		\
-		ret = register_trace_##call(ftrace_profile_##call);	\
-									\
-	return ret;							\
+	return register_trace_##call(ftrace_profile_##call);		\
 }									\
 									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
+static void ftrace_profile_disable_##call(void)				\
 {									\
-	if (atomic_add_negative(-1, &event_call->profile_count))	\
-		unregister_trace_##call(ftrace_profile_##call);		\
+	unregister_trace_##call(ftrace_profile_##call);			\
 }
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
@@ -660,11 +648,12 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *	struct ftrace_raw_##call *entry;
  *	u64 __addr = 0, __count = 1;
  *	unsigned long irq_flags;
+ *	struct trace_entry *ent;
  *	int __entry_size;
  *	int __data_size;
+ *	int __cpu
  *	int pc;
  *
- *	local_save_flags(irq_flags);
  *	pc = preempt_count();
  *
  *	__data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
@@ -675,25 +664,34 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *			     sizeof(u64));
  *	__entry_size -= sizeof(u32);
  *
- *	do {
- *		char raw_data[__entry_size]; <- allocate our sample in the stack
- *		struct trace_entry *ent;
+ *	// Protect the non nmi buffer
+ *	// This also protects the rcu read side
+ *	local_irq_save(irq_flags);
+ *	__cpu = smp_processor_id();
+ *
+ *	if (in_nmi())
+ *		raw_data = rcu_dereference(trace_profile_buf_nmi);
+ *	else
+ *		raw_data = rcu_dereference(trace_profile_buf);
+ *
+ *	if (!raw_data)
+ *		goto end;
  *
- *		zero dead bytes from alignment to avoid stack leak to userspace:
+ *	raw_data = per_cpu_ptr(raw_data, __cpu);
  *
- *		*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
- *		entry = (struct ftrace_raw_<call> *)raw_data;
- *		ent = &entry->ent;
- *		tracing_generic_entry_update(ent, irq_flags, pc);
- *		ent->type = event_call->id;
+ *	//zero dead bytes from alignment to avoid stack leak to userspace:
+ *	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
+ *	entry = (struct ftrace_raw_<call> *)raw_data;
+ *	ent = &entry->ent;
+ *	tracing_generic_entry_update(ent, irq_flags, pc);
+ *	ent->type = event_call->id;
  *
- *		<tstruct> <- do some jobs with dynamic arrays
+ *	<tstruct> <- do some jobs with dynamic arrays
  *
- *		<assign>  <- affect our values
+ *	<assign>  <- affect our values
  *
- *		perf_tpcounter_event(event_call->id, __addr, __count, entry,
- *			     __entry_size);  <- submit them to perf counter
- *	} while (0);
+ *	perf_tpcounter_event(event_call->id, __addr, __count, entry,
+ *		     __entry_size);  <- submit them to perf counter
  *
  * }
  */
@@ -716,11 +714,13 @@ static void ftrace_profile_##call(proto)				\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
+	struct trace_entry *ent;					\
 	int __entry_size;						\
 	int __data_size;						\
+	char *raw_data;							\
+	int __cpu;							\
 	int pc;								\
 									\
-	local_save_flags(irq_flags);					\
 	pc = preempt_count();						\
 									\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
@@ -728,23 +728,38 @@ static void ftrace_profile_##call(proto)				\
 			     sizeof(u64));				\
 	__entry_size -= sizeof(u32);					\
 									\
-	do {								\
-		char raw_data[__entry_size];				\
-		struct trace_entry *ent;				\
+	if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE,		\
+		      "profile buffer not large enough"))		\
+		return;							\
+									\
+	local_irq_save(irq_flags);					\
+	__cpu = smp_processor_id();					\
 									\
-		*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;	\
-		entry = (struct ftrace_raw_##call *)raw_data;		\
-		ent = &entry->ent;					\
-		tracing_generic_entry_update(ent, irq_flags, pc);	\
-		ent->type = event_call->id;				\
+	if (in_nmi())							\
+		raw_data = rcu_dereference(trace_profile_buf_nmi);		\
+	else								\
+		raw_data = rcu_dereference(trace_profile_buf);		\
 									\
-		tstruct							\
+	if (!raw_data)							\
+		goto end;						\
 									\
-		{ assign; }						\
+	raw_data = per_cpu_ptr(raw_data, __cpu);			\
 									\
-		perf_tpcounter_event(event_call->id, __addr, __count, entry,\
+	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;		\
+	entry = (struct ftrace_raw_##call *)raw_data;			\
+	ent = &entry->ent;						\
+	tracing_generic_entry_update(ent, irq_flags, pc);		\
+	ent->type = event_call->id;					\
+									\
+	tstruct								\
+									\
+	{ assign; }							\
+									\
+	perf_tpcounter_event(event_call->id, __addr, __count, entry,	\
 			     __entry_size);				\
-	} while (0);							\
+									\
+end:									\
+	local_irq_restore(irq_flags);					\
 									\
 }
 
diff --git a/init/Kconfig b/init/Kconfig
index 8e8b76d..4cc0fa1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1054,13 +1054,6 @@ config PROFILING
 config TRACEPOINTS
 	bool
 
-config MARKERS
-	bool "Activate markers"
-	select TRACEPOINTS
-	help
-	  Place an empty function call at each marker site. Can be
-	  dynamically changed for a probe function.
-
 source "arch/Kconfig"
 
 config SLOW_WORK
diff --git a/kernel/Makefile b/kernel/Makefile
index 3d9c7e2..7c9b0a5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
-obj-$(CONFIG_MARKERS) += marker.o
 obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
diff --git a/kernel/marker.c b/kernel/marker.c
deleted file mode 100644
index ea54f26..0000000
--- a/kernel/marker.c
+++ /dev/null
@@ -1,930 +0,0 @@
-/*
- * Copyright (C) 2007 Mathieu Desnoyers
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/types.h>
-#include <linux/jhash.h>
-#include <linux/list.h>
-#include <linux/rcupdate.h>
-#include <linux/marker.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-
-extern struct marker __start___markers[];
-extern struct marker __stop___markers[];
-
-/* Set to 1 to enable marker debug output */
-static const int marker_debug;
-
-/*
- * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
- * and module markers and the hash table.
- */
-static DEFINE_MUTEX(markers_mutex);
-
-/*
- * Marker hash table, containing the active markers.
- * Protected by module_mutex.
- */
-#define MARKER_HASH_BITS 6
-#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
-static struct hlist_head marker_table[MARKER_TABLE_SIZE];
-
-/*
- * Note about RCU :
- * It is used to make sure every handler has finished using its private data
- * between two consecutive operation (add or remove) on a given marker.  It is
- * also used to delay the free of multiple probes array until a quiescent state
- * is reached.
- * marker entries modifications are protected by the markers_mutex.
- */
-struct marker_entry {
-	struct hlist_node hlist;
-	char *format;
-			/* Probe wrapper */
-	void (*call)(const struct marker *mdata, void *call_private, ...);
-	struct marker_probe_closure single;
-	struct marker_probe_closure *multi;
-	int refcount;	/* Number of times armed. 0 if disarmed. */
-	struct rcu_head rcu;
-	void *oldptr;
-	int rcu_pending;
-	unsigned char ptype:1;
-	unsigned char format_allocated:1;
-	char name[0];	/* Contains name'\0'format'\0' */
-};
-
-/**
- * __mark_empty_function - Empty probe callback
- * @probe_private: probe private data
- * @call_private: call site private data
- * @fmt: format string
- * @...: variable argument list
- *
- * Empty callback provided as a probe to the markers. By providing this to a
- * disabled marker, we make sure the  execution flow is always valid even
- * though the function pointer change and the marker enabling are two distinct
- * operations that modifies the execution flow of preemptible code.
- */
-notrace void __mark_empty_function(void *probe_private, void *call_private,
-	const char *fmt, va_list *args)
-{
-}
-EXPORT_SYMBOL_GPL(__mark_empty_function);
-
-/*
- * marker_probe_cb Callback that prepares the variable argument list for probes.
- * @mdata: pointer of type struct marker
- * @call_private: caller site private data
- * @...:  Variable argument list.
- *
- * Since we do not use "typical" pointer based RCU in the 1 argument case, we
- * need to put a full smp_rmb() in this branch. This is why we do not use
- * rcu_dereference() for the pointer read.
- */
-notrace void marker_probe_cb(const struct marker *mdata,
-		void *call_private, ...)
-{
-	va_list args;
-	char ptype;
-
-	/*
-	 * rcu_read_lock_sched does two things : disabling preemption to make
-	 * sure the teardown of the callbacks can be done correctly when they
-	 * are in modules and they insure RCU read coherency.
-	 */
-	rcu_read_lock_sched_notrace();
-	ptype = mdata->ptype;
-	if (likely(!ptype)) {
-		marker_probe_func *func;
-		/* Must read the ptype before ptr. They are not data dependant,
-		 * so we put an explicit smp_rmb() here. */
-		smp_rmb();
-		func = mdata->single.func;
-		/* Must read the ptr before private data. They are not data
-		 * dependant, so we put an explicit smp_rmb() here. */
-		smp_rmb();
-		va_start(args, call_private);
-		func(mdata->single.probe_private, call_private, mdata->format,
-			&args);
-		va_end(args);
-	} else {
-		struct marker_probe_closure *multi;
-		int i;
-		/*
-		 * Read mdata->ptype before mdata->multi.
-		 */
-		smp_rmb();
-		multi = mdata->multi;
-		/*
-		 * multi points to an array, therefore accessing the array
-		 * depends on reading multi. However, even in this case,
-		 * we must insure that the pointer is read _before_ the array
-		 * data. Same as rcu_dereference, but we need a full smp_rmb()
-		 * in the fast path, so put the explicit barrier here.
-		 */
-		smp_read_barrier_depends();
-		for (i = 0; multi[i].func; i++) {
-			va_start(args, call_private);
-			multi[i].func(multi[i].probe_private, call_private,
-				mdata->format, &args);
-			va_end(args);
-		}
-	}
-	rcu_read_unlock_sched_notrace();
-}
-EXPORT_SYMBOL_GPL(marker_probe_cb);
-
-/*
- * marker_probe_cb Callback that does not prepare the variable argument list.
- * @mdata: pointer of type struct marker
- * @call_private: caller site private data
- * @...:  Variable argument list.
- *
- * Should be connected to markers "MARK_NOARGS".
- */
-static notrace void marker_probe_cb_noarg(const struct marker *mdata,
-		void *call_private, ...)
-{
-	va_list args;	/* not initialized */
-	char ptype;
-
-	rcu_read_lock_sched_notrace();
-	ptype = mdata->ptype;
-	if (likely(!ptype)) {
-		marker_probe_func *func;
-		/* Must read the ptype before ptr. They are not data dependant,
-		 * so we put an explicit smp_rmb() here. */
-		smp_rmb();
-		func = mdata->single.func;
-		/* Must read the ptr before private data. They are not data
-		 * dependant, so we put an explicit smp_rmb() here. */
-		smp_rmb();
-		func(mdata->single.probe_private, call_private, mdata->format,
-			&args);
-	} else {
-		struct marker_probe_closure *multi;
-		int i;
-		/*
-		 * Read mdata->ptype before mdata->multi.
-		 */
-		smp_rmb();
-		multi = mdata->multi;
-		/*
-		 * multi points to an array, therefore accessing the array
-		 * depends on reading multi. However, even in this case,
-		 * we must insure that the pointer is read _before_ the array
-		 * data. Same as rcu_dereference, but we need a full smp_rmb()
-		 * in the fast path, so put the explicit barrier here.
-		 */
-		smp_read_barrier_depends();
-		for (i = 0; multi[i].func; i++)
-			multi[i].func(multi[i].probe_private, call_private,
-				mdata->format, &args);
-	}
-	rcu_read_unlock_sched_notrace();
-}
-
-static void free_old_closure(struct rcu_head *head)
-{
-	struct marker_entry *entry = container_of(head,
-		struct marker_entry, rcu);
-	kfree(entry->oldptr);
-	/* Make sure we free the data before setting the pending flag to 0 */
-	smp_wmb();
-	entry->rcu_pending = 0;
-}
-
-static void debug_print_probes(struct marker_entry *entry)
-{
-	int i;
-
-	if (!marker_debug)
-		return;
-
-	if (!entry->ptype) {
-		printk(KERN_DEBUG "Single probe : %p %p\n",
-			entry->single.func,
-			entry->single.probe_private);
-	} else {
-		for (i = 0; entry->multi[i].func; i++)
-			printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
-				entry->multi[i].func,
-				entry->multi[i].probe_private);
-	}
-}
-
-static struct marker_probe_closure *
-marker_entry_add_probe(struct marker_entry *entry,
-		marker_probe_func *probe, void *probe_private)
-{
-	int nr_probes = 0;
-	struct marker_probe_closure *old, *new;
-
-	WARN_ON(!probe);
-
-	debug_print_probes(entry);
-	old = entry->multi;
-	if (!entry->ptype) {
-		if (entry->single.func == probe &&
-				entry->single.probe_private == probe_private)
-			return ERR_PTR(-EBUSY);
-		if (entry->single.func == __mark_empty_function) {
-			/* 0 -> 1 probes */
-			entry->single.func = probe;
-			entry->single.probe_private = probe_private;
-			entry->refcount = 1;
-			entry->ptype = 0;
-			debug_print_probes(entry);
-			return NULL;
-		} else {
-			/* 1 -> 2 probes */
-			nr_probes = 1;
-			old = NULL;
-		}
-	} else {
-		/* (N -> N+1), (N != 0, 1) probes */
-		for (nr_probes = 0; old[nr_probes].func; nr_probes++)
-			if (old[nr_probes].func == probe
-					&& old[nr_probes].probe_private
-						== probe_private)
-				return ERR_PTR(-EBUSY);
-	}
-	/* + 2 : one for new probe, one for NULL func */
-	new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
-			GFP_KERNEL);
-	if (new == NULL)
-		return ERR_PTR(-ENOMEM);
-	if (!old)
-		new[0] = entry->single;
-	else
-		memcpy(new, old,
-			nr_probes * sizeof(struct marker_probe_closure));
-	new[nr_probes].func = probe;
-	new[nr_probes].probe_private = probe_private;
-	entry->refcount = nr_probes + 1;
-	entry->multi = new;
-	entry->ptype = 1;
-	debug_print_probes(entry);
-	return old;
-}
-
-static struct marker_probe_closure *
-marker_entry_remove_probe(struct marker_entry *entry,
-		marker_probe_func *probe, void *probe_private)
-{
-	int nr_probes = 0, nr_del = 0, i;
-	struct marker_probe_closure *old, *new;
-
-	old = entry->multi;
-
-	debug_print_probes(entry);
-	if (!entry->ptype) {
-		/* 0 -> N is an error */
-		WARN_ON(entry->single.func == __mark_empty_function);
-		/* 1 -> 0 probes */
-		WARN_ON(probe && entry->single.func != probe);
-		WARN_ON(entry->single.probe_private != probe_private);
-		entry->single.func = __mark_empty_function;
-		entry->refcount = 0;
-		entry->ptype = 0;
-		debug_print_probes(entry);
-		return NULL;
-	} else {
-		/* (N -> M), (N > 1, M >= 0) probes */
-		for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
-			if ((!probe || old[nr_probes].func == probe)
-					&& old[nr_probes].probe_private
-						== probe_private)
-				nr_del++;
-		}
-	}
-
-	if (nr_probes - nr_del == 0) {
-		/* N -> 0, (N > 1) */
-		entry->single.func = __mark_empty_function;
-		entry->refcount = 0;
-		entry->ptype = 0;
-	} else if (nr_probes - nr_del == 1) {
-		/* N -> 1, (N > 1) */
-		for (i = 0; old[i].func; i++)
-			if ((probe && old[i].func != probe) ||
-					old[i].probe_private != probe_private)
-				entry->single = old[i];
-		entry->refcount = 1;
-		entry->ptype = 0;
-	} else {
-		int j = 0;
-		/* N -> M, (N > 1, M > 1) */
-		/* + 1 for NULL */
-		new = kzalloc((nr_probes - nr_del + 1)
-			* sizeof(struct marker_probe_closure), GFP_KERNEL);
-		if (new == NULL)
-			return ERR_PTR(-ENOMEM);
-		for (i = 0; old[i].func; i++)
-			if ((probe && old[i].func != probe) ||
-					old[i].probe_private != probe_private)
-				new[j++] = old[i];
-		entry->refcount = nr_probes - nr_del;
-		entry->ptype = 1;
-		entry->multi = new;
-	}
-	debug_print_probes(entry);
-	return old;
-}
-
-/*
- * Get marker if the marker is present in the marker hash table.
- * Must be called with markers_mutex held.
- * Returns NULL if not present.
- */
-static struct marker_entry *get_marker(const char *name)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct marker_entry *e;
-	u32 hash = jhash(name, strlen(name), 0);
-
-	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (!strcmp(name, e->name))
-			return e;
-	}
-	return NULL;
-}
-
-/*
- * Add the marker to the marker hash table. Must be called with markers_mutex
- * held.
- */
-static struct marker_entry *add_marker(const char *name, const char *format)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct marker_entry *e;
-	size_t name_len = strlen(name) + 1;
-	size_t format_len = 0;
-	u32 hash = jhash(name, name_len-1, 0);
-
-	if (format)
-		format_len = strlen(format) + 1;
-	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (!strcmp(name, e->name)) {
-			printk(KERN_NOTICE
-				"Marker %s busy\n", name);
-			return ERR_PTR(-EBUSY);	/* Already there */
-		}
-	}
-	/*
-	 * Using kmalloc here to allocate a variable length element. Could
-	 * cause some memory fragmentation if overused.
-	 */
-	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
-			GFP_KERNEL);
-	if (!e)
-		return ERR_PTR(-ENOMEM);
-	memcpy(&e->name[0], name, name_len);
-	if (format) {
-		e->format = &e->name[name_len];
-		memcpy(e->format, format, format_len);
-		if (strcmp(e->format, MARK_NOARGS) == 0)
-			e->call = marker_probe_cb_noarg;
-		else
-			e->call = marker_probe_cb;
-		trace_mark(core_marker_format, "name %s format %s",
-				e->name, e->format);
-	} else {
-		e->format = NULL;
-		e->call = marker_probe_cb;
-	}
-	e->single.func = __mark_empty_function;
-	e->single.probe_private = NULL;
-	e->multi = NULL;
-	e->ptype = 0;
-	e->format_allocated = 0;
-	e->refcount = 0;
-	e->rcu_pending = 0;
-	hlist_add_head(&e->hlist, head);
-	return e;
-}
-
-/*
- * Remove the marker from the marker hash table. Must be called with mutex_lock
- * held.
- */
-static int remove_marker(const char *name)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct marker_entry *e;
-	int found = 0;
-	size_t len = strlen(name) + 1;
-	u32 hash = jhash(name, len-1, 0);
-
-	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (!strcmp(name, e->name)) {
-			found = 1;
-			break;
-		}
-	}
-	if (!found)
-		return -ENOENT;
-	if (e->single.func != __mark_empty_function)
-		return -EBUSY;
-	hlist_del(&e->hlist);
-	if (e->format_allocated)
-		kfree(e->format);
-	/* Make sure the call_rcu has been executed */
-	if (e->rcu_pending)
-		rcu_barrier_sched();
-	kfree(e);
-	return 0;
-}
-
-/*
- * Set the mark_entry format to the format found in the element.
- */
-static int marker_set_format(struct marker_entry *entry, const char *format)
-{
-	entry->format = kstrdup(format, GFP_KERNEL);
-	if (!entry->format)
-		return -ENOMEM;
-	entry->format_allocated = 1;
-
-	trace_mark(core_marker_format, "name %s format %s",
-			entry->name, entry->format);
-	return 0;
-}
-
-/*
- * Sets the probe callback corresponding to one marker.
- */
-static int set_marker(struct marker_entry *entry, struct marker *elem,
-		int active)
-{
-	int ret = 0;
-	WARN_ON(strcmp(entry->name, elem->name) != 0);
-
-	if (entry->format) {
-		if (strcmp(entry->format, elem->format) != 0) {
-			printk(KERN_NOTICE
-				"Format mismatch for probe %s "
-				"(%s), marker (%s)\n",
-				entry->name,
-				entry->format,
-				elem->format);
-			return -EPERM;
-		}
-	} else {
-		ret = marker_set_format(entry, elem->format);
-		if (ret)
-			return ret;
-	}
-
-	/*
-	 * probe_cb setup (statically known) is done here. It is
-	 * asynchronous with the rest of execution, therefore we only
-	 * pass from a "safe" callback (with argument) to an "unsafe"
-	 * callback (does not set arguments).
-	 */
-	elem->call = entry->call;
-	/*
-	 * Sanity check :
-	 * We only update the single probe private data when the ptr is
-	 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
-	 */
-	WARN_ON(elem->single.func != __mark_empty_function
-		&& elem->single.probe_private != entry->single.probe_private
-		&& !elem->ptype);
-	elem->single.probe_private = entry->single.probe_private;
-	/*
-	 * Make sure the private data is valid when we update the
-	 * single probe ptr.
-	 */
-	smp_wmb();
-	elem->single.func = entry->single.func;
-	/*
-	 * We also make sure that the new probe callbacks array is consistent
-	 * before setting a pointer to it.
-	 */
-	rcu_assign_pointer(elem->multi, entry->multi);
-	/*
-	 * Update the function or multi probe array pointer before setting the
-	 * ptype.
-	 */
-	smp_wmb();
-	elem->ptype = entry->ptype;
-
-	if (elem->tp_name && (active ^ elem->state)) {
-		WARN_ON(!elem->tp_cb);
-		/*
-		 * It is ok to directly call the probe registration because type
-		 * checking has been done in the __trace_mark_tp() macro.
-		 */
-
-		if (active) {
-			/*
-			 * try_module_get should always succeed because we hold
-			 * lock_module() to get the tp_cb address.
-			 */
-			ret = try_module_get(__module_text_address(
-				(unsigned long)elem->tp_cb));
-			BUG_ON(!ret);
-			ret = tracepoint_probe_register_noupdate(
-				elem->tp_name,
-				elem->tp_cb);
-		} else {
-			ret = tracepoint_probe_unregister_noupdate(
-				elem->tp_name,
-				elem->tp_cb);
-			/*
-			 * tracepoint_probe_update_all() must be called
-			 * before the module containing tp_cb is unloaded.
-			 */
-			module_put(__module_text_address(
-				(unsigned long)elem->tp_cb));
-		}
-	}
-	elem->state = active;
-
-	return ret;
-}
-
-/*
- * Disable a marker and its probe callback.
- * Note: only waiting an RCU period after setting elem->call to the empty
- * function insures that the original callback is not used anymore. This insured
- * by rcu_read_lock_sched around the call site.
- */
-static void disable_marker(struct marker *elem)
-{
-	int ret;
-
-	/* leave "call" as is. It is known statically. */
-	if (elem->tp_name && elem->state) {
-		WARN_ON(!elem->tp_cb);
-		/*
-		 * It is ok to directly call the probe registration because type
-		 * checking has been done in the __trace_mark_tp() macro.
-		 */
-		ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
-			elem->tp_cb);
-		WARN_ON(ret);
-		/*
-		 * tracepoint_probe_update_all() must be called
-		 * before the module containing tp_cb is unloaded.
-		 */
-		module_put(__module_text_address((unsigned long)elem->tp_cb));
-	}
-	elem->state = 0;
-	elem->single.func = __mark_empty_function;
-	/* Update the function before setting the ptype */
-	smp_wmb();
-	elem->ptype = 0;	/* single probe */
-	/*
-	 * Leave the private data and id there, because removal is racy and
-	 * should be done only after an RCU period. These are never used until
-	 * the next initialization anyway.
-	 */
-}
-
-/**
- * marker_update_probe_range - Update a probe range
- * @begin: beginning of the range
- * @end: end of the range
- *
- * Updates the probe callback corresponding to a range of markers.
- */
-void marker_update_probe_range(struct marker *begin,
-	struct marker *end)
-{
-	struct marker *iter;
-	struct marker_entry *mark_entry;
-
-	mutex_lock(&markers_mutex);
-	for (iter = begin; iter < end; iter++) {
-		mark_entry = get_marker(iter->name);
-		if (mark_entry) {
-			set_marker(mark_entry, iter, !!mark_entry->refcount);
-			/*
-			 * ignore error, continue
-			 */
-		} else {
-			disable_marker(iter);
-		}
-	}
-	mutex_unlock(&markers_mutex);
-}
-
-/*
- * Update probes, removing the faulty probes.
- *
- * Internal callback only changed before the first probe is connected to it.
- * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
- * transitions.  All other transitions will leave the old private data valid.
- * This makes the non-atomicity of the callback/private data updates valid.
- *
- * "special case" updates :
- * 0 -> 1 callback
- * 1 -> 0 callback
- * 1 -> 2 callbacks
- * 2 -> 1 callbacks
- * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
- * Site effect : marker_set_format may delete the marker entry (creating a
- * replacement).
- */
-static void marker_update_probes(void)
-{
-	/* Core kernel markers */
-	marker_update_probe_range(__start___markers, __stop___markers);
-	/* Markers in modules. */
-	module_update_markers();
-	tracepoint_probe_update_all();
-}
-
-/**
- * marker_probe_register -  Connect a probe to a marker
- * @name: marker name
- * @format: format string
- * @probe: probe handler
- * @probe_private: probe private data
- *
- * private data must be a valid allocated memory address, or NULL.
- * Returns 0 if ok, error value on error.
- * The probe address must at least be aligned on the architecture pointer size.
- */
-int marker_probe_register(const char *name, const char *format,
-			marker_probe_func *probe, void *probe_private)
-{
-	struct marker_entry *entry;
-	int ret = 0;
-	struct marker_probe_closure *old;
-
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry) {
-		entry = add_marker(name, format);
-		if (IS_ERR(entry))
-			ret = PTR_ERR(entry);
-	} else if (format) {
-		if (!entry->format)
-			ret = marker_set_format(entry, format);
-		else if (strcmp(entry->format, format))
-			ret = -EPERM;
-	}
-	if (ret)
-		goto end;
-
-	/*
-	 * If we detect that a call_rcu is pending for this marker,
-	 * make sure it's executed now.
-	 */
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	old = marker_entry_add_probe(entry, probe, probe_private);
-	if (IS_ERR(old)) {
-		ret = PTR_ERR(old);
-		goto end;
-	}
-	mutex_unlock(&markers_mutex);
-	marker_update_probes();
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry)
-		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
-end:
-	mutex_unlock(&markers_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(marker_probe_register);
-
-/**
- * marker_probe_unregister -  Disconnect a probe from a marker
- * @name: marker name
- * @probe: probe function pointer
- * @probe_private: probe private data
- *
- * Returns the private data given to marker_probe_register, or an ERR_PTR().
- * We do not need to call a synchronize_sched to make sure the probes have
- * finished running before doing a module unload, because the module unload
- * itself uses stop_machine(), which insures that every preempt disabled section
- * have finished.
- */
-int marker_probe_unregister(const char *name,
-	marker_probe_func *probe, void *probe_private)
-{
-	struct marker_entry *entry;
-	struct marker_probe_closure *old;
-	int ret = -ENOENT;
-
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry)
-		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	old = marker_entry_remove_probe(entry, probe, probe_private);
-	mutex_unlock(&markers_mutex);
-	marker_update_probes();
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry)
-		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
-	remove_marker(name);	/* Ignore busy error message */
-	ret = 0;
-end:
-	mutex_unlock(&markers_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(marker_probe_unregister);
-
-static struct marker_entry *
-get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
-{
-	struct marker_entry *entry;
-	unsigned int i;
-	struct hlist_head *head;
-	struct hlist_node *node;
-
-	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
-		head = &marker_table[i];
-		hlist_for_each_entry(entry, node, head, hlist) {
-			if (!entry->ptype) {
-				if (entry->single.func == probe
-						&& entry->single.probe_private
-						== probe_private)
-					return entry;
-			} else {
-				struct marker_probe_closure *closure;
-				closure = entry->multi;
-				for (i = 0; closure[i].func; i++) {
-					if (closure[i].func == probe &&
-							closure[i].probe_private
-							== probe_private)
-						return entry;
-				}
-			}
-		}
-	}
-	return NULL;
-}
-
-/**
- * marker_probe_unregister_private_data -  Disconnect a probe from a marker
- * @probe: probe function
- * @probe_private: probe private data
- *
- * Unregister a probe by providing the registered private data.
- * Only removes the first marker found in hash table.
- * Return 0 on success or error value.
- * We do not need to call a synchronize_sched to make sure the probes have
- * finished running before doing a module unload, because the module unload
- * itself uses stop_machine(), which insures that every preempt disabled section
- * have finished.
- */
-int marker_probe_unregister_private_data(marker_probe_func *probe,
-		void *probe_private)
-{
-	struct marker_entry *entry;
-	int ret = 0;
-	struct marker_probe_closure *old;
-
-	mutex_lock(&markers_mutex);
-	entry = get_marker_from_private_data(probe, probe_private);
-	if (!entry) {
-		ret = -ENOENT;
-		goto end;
-	}
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	old = marker_entry_remove_probe(entry, NULL, probe_private);
-	mutex_unlock(&markers_mutex);
-	marker_update_probes();
-	mutex_lock(&markers_mutex);
-	entry = get_marker_from_private_data(probe, probe_private);
-	if (!entry)
-		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
-	remove_marker(entry->name);	/* Ignore busy error message */
-end:
-	mutex_unlock(&markers_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
-
-/**
- * marker_get_private_data - Get a marker's probe private data
- * @name: marker name
- * @probe: probe to match
- * @num: get the nth matching probe's private data
- *
- * Returns the nth private data pointer (starting from 0) matching, or an
- * ERR_PTR.
- * Returns the private data pointer, or an ERR_PTR.
- * The private data pointer should _only_ be dereferenced if the caller is the
- * owner of the data, or its content could vanish. This is mostly used to
- * confirm that a caller is the owner of a registered probe.
- */
-void *marker_get_private_data(const char *name, marker_probe_func *probe,
-		int num)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct marker_entry *e;
-	size_t name_len = strlen(name) + 1;
-	u32 hash = jhash(name, name_len-1, 0);
-	int i;
-
-	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (!strcmp(name, e->name)) {
-			if (!e->ptype) {
-				if (num == 0 && e->single.func == probe)
-					return e->single.probe_private;
-			} else {
-				struct marker_probe_closure *closure;
-				int match = 0;
-				closure = e->multi;
-				for (i = 0; closure[i].func; i++) {
-					if (closure[i].func != probe)
-						continue;
-					if (match++ == num)
-						return closure[i].probe_private;
-				}
-			}
-			break;
-		}
-	}
-	return ERR_PTR(-ENOENT);
-}
-EXPORT_SYMBOL_GPL(marker_get_private_data);
-
-#ifdef CONFIG_MODULES
-
-int marker_module_notify(struct notifier_block *self,
-			 unsigned long val, void *data)
-{
-	struct module *mod = data;
-
-	switch (val) {
-	case MODULE_STATE_COMING:
-		marker_update_probe_range(mod->markers,
-			mod->markers + mod->num_markers);
-		break;
-	case MODULE_STATE_GOING:
-		marker_update_probe_range(mod->markers,
-			mod->markers + mod->num_markers);
-		break;
-	}
-	return 0;
-}
-
-struct notifier_block marker_module_nb = {
-	.notifier_call = marker_module_notify,
-	.priority = 0,
-};
-
-static int init_markers(void)
-{
-	return register_module_notifier(&marker_module_nb);
-}
-__initcall(init_markers);
-
-#endif /* CONFIG_MODULES */
diff --git a/kernel/module.c b/kernel/module.c
index 05ce49c..b6ee424 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod,
 				  sizeof(*mod->ctors), &mod->num_ctors);
 #endif
 
-#ifdef CONFIG_MARKERS
-	mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers",
-				    sizeof(*mod->markers), &mod->num_markers);
-#endif
 #ifdef CONFIG_TRACEPOINTS
 	mod->tracepoints = section_objs(hdr, sechdrs, secstrings,
 					"__tracepoints",
@@ -2958,20 +2954,6 @@ void module_layout(struct module *mod,
 EXPORT_SYMBOL(module_layout);
 #endif
 
-#ifdef CONFIG_MARKERS
-void module_update_markers(void)
-{
-	struct module *mod;
-
-	mutex_lock(&module_mutex);
-	list_for_each_entry(mod, &modules, list)
-		if (!mod->taints)
-			marker_update_probe_range(mod->markers,
-				mod->markers + mod->num_markers);
-	mutex_unlock(&module_mutex);
-}
-#endif
-
 #ifdef CONFIG_TRACEPOINTS
 void module_update_tracepoints(void)
 {
diff --git a/kernel/profile.c b/kernel/profile.c
index 419250e..a55d3a3 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -442,48 +442,51 @@ void profile_tick(int type)
 
 #ifdef CONFIG_PROC_FS
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <asm/uaccess.h>
 
-static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
-			int count, int *eof, void *data)
+static int prof_cpu_mask_proc_show(struct seq_file *m, void *v)
 {
-	int len = cpumask_scnprintf(page, count, data);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
+	seq_cpumask(m, prof_cpu_mask);
+	seq_putc(m, '\n');
+	return 0;
 }
 
-static int prof_cpu_mask_write_proc(struct file *file,
-	const char __user *buffer,  unsigned long count, void *data)
+static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, prof_cpu_mask_proc_show, NULL);
+}
+
+static ssize_t prof_cpu_mask_proc_write(struct file *file,
+	const char __user *buffer, size_t count, loff_t *pos)
 {
-	struct cpumask *mask = data;
-	unsigned long full_count = count, err;
 	cpumask_var_t new_value;
+	int err;
 
 	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
 		return -ENOMEM;
 
 	err = cpumask_parse_user(buffer, count, new_value);
 	if (!err) {
-		cpumask_copy(mask, new_value);
-		err = full_count;
+		cpumask_copy(prof_cpu_mask, new_value);
+		err = count;
 	}
 	free_cpumask_var(new_value);
 	return err;
 }
 
+static const struct file_operations prof_cpu_mask_proc_fops = {
+	.open		= prof_cpu_mask_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= prof_cpu_mask_proc_write,
+};
+
 void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
 {
-	struct proc_dir_entry *entry;
-
 	/* create /proc/irq/prof_cpu_mask */
-	entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
-	if (!entry)
-		return;
-	entry->data = prof_cpu_mask;
-	entry->read_proc = prof_cpu_mask_read_proc;
-	entry->write_proc = prof_cpu_mask_write_proc;
+	proc_create("prof_cpu_mask", 0600, root_irq_dir, &prof_cpu_mask_proc_fops);
 }
 
 /*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index cc615f8..c71e91b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2414,11 +2414,9 @@ unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
 static void *
 __g_next(struct seq_file *m, loff_t *pos)
 {
-	unsigned long *array = m->private;
-
 	if (*pos >= ftrace_graph_count)
 		return NULL;
-	return &array[*pos];
+	return &ftrace_graph_funcs[*pos];
 }
 
 static void *
@@ -2482,16 +2480,10 @@ ftrace_graph_open(struct inode *inode, struct file *file)
 		ftrace_graph_count = 0;
 		memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
 	}
+	mutex_unlock(&graph_lock);
 
-	if (file->f_mode & FMODE_READ) {
+	if (file->f_mode & FMODE_READ)
 		ret = seq_open(file, &ftrace_graph_seq_ops);
-		if (!ret) {
-			struct seq_file *m = file->private_data;
-			m->private = ftrace_graph_funcs;
-		}
-	} else
-		file->private_data = ftrace_graph_funcs;
-	mutex_unlock(&graph_lock);
 
 	return ret;
 }
@@ -2560,7 +2552,6 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 		   size_t cnt, loff_t *ppos)
 {
 	struct trace_parser parser;
-	unsigned long *array;
 	size_t read = 0;
 	ssize_t ret;
 
@@ -2574,12 +2565,6 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 		goto out;
 	}
 
-	if (file->f_mode & FMODE_READ) {
-		struct seq_file *m = file->private_data;
-		array = m->private;
-	} else
-		array = file->private_data;
-
 	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
 		ret = -ENOMEM;
 		goto out;
@@ -2591,7 +2576,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 		parser.buffer[parser.idx] = 0;
 
 		/* we allow only one expression at a time */
-		ret = ftrace_set_func(array, &ftrace_graph_count,
+		ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
 					parser.buffer);
 		if (ret)
 			goto out;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index fd52a19..8613080 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -125,13 +125,13 @@ int ftrace_dump_on_oops;
 
 static int tracing_set_tracer(const char *buf);
 
-#define BOOTUP_TRACER_SIZE		100
-static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
+#define MAX_TRACER_SIZE		100
+static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 static char *default_bootup_tracer;
 
 static int __init set_ftrace(char *str)
 {
-	strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
+	strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
 	default_bootup_tracer = bootup_tracer_buf;
 	/* We are using ftrace early, expand it */
 	ring_buffer_expanded = 1;
@@ -242,13 +242,6 @@ static struct tracer		*trace_types __read_mostly;
 static struct tracer		*current_trace __read_mostly;
 
 /*
- * max_tracer_type_len is used to simplify the allocating of
- * buffers to read userspace tracer names. We keep track of
- * the longest tracer name registered.
- */
-static int			max_tracer_type_len;
-
-/*
  * trace_types_lock is used to protect the trace_types list.
  * This lock is also used to keep user access serialized.
  * Accesses from userspace will grab this lock while userspace
@@ -619,7 +612,6 @@ __releases(kernel_lock)
 __acquires(kernel_lock)
 {
 	struct tracer *t;
-	int len;
 	int ret = 0;
 
 	if (!type->name) {
@@ -627,6 +619,11 @@ __acquires(kernel_lock)
 		return -1;
 	}
 
+	if (strlen(type->name) > MAX_TRACER_SIZE) {
+		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
+		return -1;
+	}
+
 	/*
 	 * When this gets called we hold the BKL which means that
 	 * preemption is disabled. Various trace selftests however
@@ -641,7 +638,7 @@ __acquires(kernel_lock)
 	for (t = trace_types; t; t = t->next) {
 		if (strcmp(type->name, t->name) == 0) {
 			/* already found */
-			pr_info("Trace %s already registered\n",
+			pr_info("Tracer %s already registered\n",
 				type->name);
 			ret = -1;
 			goto out;
@@ -692,9 +689,6 @@ __acquires(kernel_lock)
 
 	type->next = trace_types;
 	trace_types = type;
-	len = strlen(type->name);
-	if (len > max_tracer_type_len)
-		max_tracer_type_len = len;
 
  out:
 	tracing_selftest_running = false;
@@ -703,7 +697,7 @@ __acquires(kernel_lock)
 	if (ret || !default_bootup_tracer)
 		goto out_unlock;
 
-	if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
+	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
 		goto out_unlock;
 
 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
@@ -725,14 +719,13 @@ __acquires(kernel_lock)
 void unregister_tracer(struct tracer *type)
 {
 	struct tracer **t;
-	int len;
 
 	mutex_lock(&trace_types_lock);
 	for (t = &trace_types; *t; t = &(*t)->next) {
 		if (*t == type)
 			goto found;
 	}
-	pr_info("Trace %s not registered\n", type->name);
+	pr_info("Tracer %s not registered\n", type->name);
 	goto out;
 
  found:
@@ -745,17 +738,7 @@ void unregister_tracer(struct tracer *type)
 			current_trace->stop(&global_trace);
 		current_trace = &nop_trace;
 	}
-
-	if (strlen(type->name) != max_tracer_type_len)
-		goto out;
-
-	max_tracer_type_len = 0;
-	for (t = &trace_types; *t; t = &(*t)->next) {
-		len = strlen((*t)->name);
-		if (len > max_tracer_type_len)
-			max_tracer_type_len = len;
-	}
- out:
+out:
 	mutex_unlock(&trace_types_lock);
 }
 
@@ -2604,7 +2587,7 @@ static ssize_t
 tracing_set_trace_read(struct file *filp, char __user *ubuf,
 		       size_t cnt, loff_t *ppos)
 {
-	char buf[max_tracer_type_len+2];
+	char buf[MAX_TRACER_SIZE+2];
 	int r;
 
 	mutex_lock(&trace_types_lock);
@@ -2754,15 +2737,15 @@ static ssize_t
 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
 			size_t cnt, loff_t *ppos)
 {
-	char buf[max_tracer_type_len+1];
+	char buf[MAX_TRACER_SIZE+1];
 	int i;
 	size_t ret;
 	int err;
 
 	ret = cnt;
 
-	if (cnt > max_tracer_type_len)
-		cnt = max_tracer_type_len;
+	if (cnt > MAX_TRACER_SIZE)
+		cnt = MAX_TRACER_SIZE;
 
 	if (copy_from_user(&buf, ubuf, cnt))
 		return -EFAULT;
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 55a25c9..dd44b87 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,6 +8,57 @@
 #include <linux/module.h>
 #include "trace.h"
 
+/*
+ * We can't use a size but a type in alloc_percpu()
+ * So let's create a dummy type that matches the desired size
+ */
+typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
+
+char		*trace_profile_buf;
+EXPORT_SYMBOL_GPL(trace_profile_buf);
+
+char		*trace_profile_buf_nmi;
+EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
+
+/* Count the events in use (per event id, not per instance) */
+static int	total_profile_count;
+
+static int ftrace_profile_enable_event(struct ftrace_event_call *event)
+{
+	char *buf;
+	int ret = -ENOMEM;
+
+	if (atomic_inc_return(&event->profile_count))
+		return 0;
+
+	if (!total_profile_count++) {
+		buf = (char *)alloc_percpu(profile_buf_t);
+		if (!buf)
+			goto fail_buf;
+
+		rcu_assign_pointer(trace_profile_buf, buf);
+
+		buf = (char *)alloc_percpu(profile_buf_t);
+		if (!buf)
+			goto fail_buf_nmi;
+
+		rcu_assign_pointer(trace_profile_buf_nmi, buf);
+	}
+
+	ret = event->profile_enable();
+	if (!ret)
+		return 0;
+
+	kfree(trace_profile_buf_nmi);
+fail_buf_nmi:
+	kfree(trace_profile_buf);
+fail_buf:
+	total_profile_count--;
+	atomic_dec(&event->profile_count);
+
+	return ret;
+}
+
 int ftrace_profile_enable(int event_id)
 {
 	struct ftrace_event_call *event;
@@ -17,7 +68,7 @@ int ftrace_profile_enable(int event_id)
 	list_for_each_entry(event, &ftrace_events, list) {
 		if (event->id == event_id && event->profile_enable &&
 		    try_module_get(event->mod)) {
-			ret = event->profile_enable(event);
+			ret = ftrace_profile_enable_event(event);
 			break;
 		}
 	}
@@ -26,6 +77,33 @@ int ftrace_profile_enable(int event_id)
 	return ret;
 }
 
+static void ftrace_profile_disable_event(struct ftrace_event_call *event)
+{
+	char *buf, *nmi_buf;
+
+	if (!atomic_add_negative(-1, &event->profile_count))
+		return;
+
+	event->profile_disable();
+
+	if (!--total_profile_count) {
+		buf = trace_profile_buf;
+		rcu_assign_pointer(trace_profile_buf, NULL);
+
+		nmi_buf = trace_profile_buf_nmi;
+		rcu_assign_pointer(trace_profile_buf_nmi, NULL);
+
+		/*
+		 * Ensure every events in profiling have finished before
+		 * releasing the buffers
+		 */
+		synchronize_sched();
+
+		free_percpu(buf);
+		free_percpu(nmi_buf);
+	}
+}
+
 void ftrace_profile_disable(int event_id)
 {
 	struct ftrace_event_call *event;
@@ -33,7 +111,7 @@ void ftrace_profile_disable(int event_id)
 	mutex_lock(&event_mutex);
 	list_for_each_entry(event, &ftrace_events, list) {
 		if (event->id == event_id) {
-			event->profile_disable(event);
+			ftrace_profile_disable_event(event);
 			module_put(event->mod);
 			break;
 		}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 56c260b..6f03c8a 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -271,42 +271,32 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct list_head *list = m->private;
-	struct ftrace_event_call *call;
+	struct ftrace_event_call *call = v;
 
 	(*pos)++;
 
-	for (;;) {
-		if (list == &ftrace_events)
-			return NULL;
-
-		call = list_entry(list, struct ftrace_event_call, list);
-
+	list_for_each_entry_continue(call, &ftrace_events, list) {
 		/*
 		 * The ftrace subsystem is for showing formats only.
 		 * They can not be enabled or disabled via the event files.
 		 */
 		if (call->regfunc)
-			break;
-
-		list = list->next;
+			return call;
 	}
 
-	m->private = list->next;
-
-	return call;
+	return NULL;
 }
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-	struct ftrace_event_call *call = NULL;
+	struct ftrace_event_call *call;
 	loff_t l;
 
 	mutex_lock(&event_mutex);
 
-	m->private = ftrace_events.next;
+	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
 	for (l = 0; l <= *pos; ) {
-		call = t_next(m, NULL, &l);
+		call = t_next(m, call, &l);
 		if (!call)
 			break;
 	}
@@ -316,37 +306,28 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 static void *
 s_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct list_head *list = m->private;
-	struct ftrace_event_call *call;
+	struct ftrace_event_call *call = v;
 
 	(*pos)++;
 
- retry:
-	if (list == &ftrace_events)
-		return NULL;
-
-	call = list_entry(list, struct ftrace_event_call, list);
-
-	if (!call->enabled) {
-		list = list->next;
-		goto retry;
+	list_for_each_entry_continue(call, &ftrace_events, list) {
+		if (call->enabled)
+			return call;
 	}
 
-	m->private = list->next;
-
-	return call;
+	return NULL;
 }
 
 static void *s_start(struct seq_file *m, loff_t *pos)
 {
-	struct ftrace_event_call *call = NULL;
+	struct ftrace_event_call *call;
 	loff_t l;
 
 	mutex_lock(&event_mutex);
 
-	m->private = ftrace_events.next;
+	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
 	for (l = 0; l <= *pos; ) {
-		call = s_next(m, NULL, &l);
+		call = s_next(m, call, &l);
 		if (!call)
 			break;
 	}
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 687699d..2547d88 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -11,7 +11,6 @@
 #include <linux/ftrace.h>
 #include <linux/string.h>
 #include <linux/module.h>
-#include <linux/marker.h>
 #include <linux/mutex.h>
 #include <linux/ctype.h>
 #include <linux/list.h>
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8712ce3..7a3550c 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -384,10 +384,13 @@ static int sys_prof_refcount_exit;
 
 static void prof_syscall_enter(struct pt_regs *regs, long id)
 {
-	struct syscall_trace_enter *rec;
 	struct syscall_metadata *sys_data;
+	struct syscall_trace_enter *rec;
+	unsigned long flags;
+	char *raw_data;
 	int syscall_nr;
 	int size;
+	int cpu;
 
 	syscall_nr = syscall_get_nr(current, regs);
 	if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -402,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	size = ALIGN(size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
 
-	do {
-		char raw_data[size];
+	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+		      "profile buffer not large enough"))
+		return;
+
+	/* Protect the per cpu buffer, begin the rcu read side */
+	local_irq_save(flags);
 
-		/* zero the dead bytes from align to not leak stack to user */
-		*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+	cpu = smp_processor_id();
+
+	if (in_nmi())
+		raw_data = rcu_dereference(trace_profile_buf_nmi);
+	else
+		raw_data = rcu_dereference(trace_profile_buf);
+
+	if (!raw_data)
+		goto end;
 
-		rec = (struct syscall_trace_enter *) raw_data;
-		tracing_generic_entry_update(&rec->ent, 0, 0);
-		rec->ent.type = sys_data->enter_id;
-		rec->nr = syscall_nr;
-		syscall_get_arguments(current, regs, 0, sys_data->nb_args,
-				       (unsigned long *)&rec->args);
-		perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
-	} while(0);
+	raw_data = per_cpu_ptr(raw_data, cpu);
+
+	/* zero the dead bytes from align to not leak stack to user */
+	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+	rec = (struct syscall_trace_enter *) raw_data;
+	tracing_generic_entry_update(&rec->ent, 0, 0);
+	rec->ent.type = sys_data->enter_id;
+	rec->nr = syscall_nr;
+	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
+			       (unsigned long *)&rec->args);
+	perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
+
+end:
+	local_irq_restore(flags);
 }
 
 int reg_prof_syscall_enter(char *name)
@@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name)
 static void prof_syscall_exit(struct pt_regs *regs, long ret)
 {
 	struct syscall_metadata *sys_data;
-	struct syscall_trace_exit rec;
+	struct syscall_trace_exit *rec;
+	unsigned long flags;
 	int syscall_nr;
+	char *raw_data;
+	int size;
+	int cpu;
 
 	syscall_nr = syscall_get_nr(current, regs);
 	if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	if (!sys_data)
 		return;
 
-	tracing_generic_entry_update(&rec.ent, 0, 0);
-	rec.ent.type = sys_data->exit_id;
-	rec.nr = syscall_nr;
-	rec.ret = syscall_get_return_value(current, regs);
+	/* We can probably do that at build time */
+	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
+	size -= sizeof(u32);
 
-	perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
+	/*
+	 * Impossible, but be paranoid with the future
+	 * How to put this check outside runtime?
+	 */
+	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+		"exit event has grown above profile buffer size"))
+		return;
+
+	/* Protect the per cpu buffer, begin the rcu read side */
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+
+	if (in_nmi())
+		raw_data = rcu_dereference(trace_profile_buf_nmi);
+	else
+		raw_data = rcu_dereference(trace_profile_buf);
+
+	if (!raw_data)
+		goto end;
+
+	raw_data = per_cpu_ptr(raw_data, cpu);
+
+	/* zero the dead bytes from align to not leak stack to user */
+	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+	rec = (struct syscall_trace_exit *)raw_data;
+
+	tracing_generic_entry_update(&rec->ent, 0, 0);
+	rec->ent.type = sys_data->exit_id;
+	rec->nr = syscall_nr;
+	rec->ret = syscall_get_return_value(current, regs);
+
+	perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size);
+
+end:
+	local_irq_restore(flags);
 }
 
 int reg_prof_syscall_exit(char *name)
diff --git a/samples/Kconfig b/samples/Kconfig
index 428b065..b92bde3 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -7,12 +7,6 @@ menuconfig SAMPLES
 
 if SAMPLES
 
-config SAMPLE_MARKERS
-	tristate "Build markers examples -- loadable modules only"
-	depends on MARKERS && m
-	help
-	  This build markers example modules.
-
 config SAMPLE_TRACEPOINTS
 	tristate "Build tracepoints examples -- loadable modules only"
 	depends on TRACEPOINTS && m
diff --git a/samples/Makefile b/samples/Makefile
index 13e4b47..43343a0 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,3 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/ tracepoints/ trace_events/
+obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ tracepoints/ trace_events/
diff --git a/samples/markers/Makefile b/samples/markers/Makefile
deleted file mode 100644
index 6d72312..0000000
--- a/samples/markers/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# builds the kprobes example kernel modules;
-# then to use one (as root):  insmod <module_name.ko>
-
-obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o
diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c
deleted file mode 100644
index e9cd9c0..0000000
--- a/samples/markers/marker-example.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/* marker-example.c
- *
- * Executes a marker when /proc/marker-example is opened.
- *
- * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
- */
-
-#include <linux/module.h>
-#include <linux/marker.h>
-#include <linux/sched.h>
-#include <linux/proc_fs.h>
-
-struct proc_dir_entry *pentry_example;
-
-static int my_open(struct inode *inode, struct file *file)
-{
-	int i;
-
-	trace_mark(subsystem_event, "integer %d string %s", 123,
-		"example string");
-	for (i = 0; i < 10; i++)
-		trace_mark(subsystem_eventb, MARK_NOARGS);
-	return -EPERM;
-}
-
-static struct file_operations mark_ops = {
-	.open = my_open,
-};
-
-static int __init example_init(void)
-{
-	printk(KERN_ALERT "example init\n");
-	pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops);
-	if (!pentry_example)
-		return -EPERM;
-	return 0;
-}
-
-static void __exit example_exit(void)
-{
-	printk(KERN_ALERT "example exit\n");
-	remove_proc_entry("marker-example", NULL);
-}
-
-module_init(example_init)
-module_exit(example_exit)
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mathieu Desnoyers");
-MODULE_DESCRIPTION("Marker example");
diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
deleted file mode 100644
index 2dfb3b3..0000000
--- a/samples/markers/probe-example.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/* probe-example.c
- *
- * Connects two functions to marker call sites.
- *
- * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/marker.h>
-#include <asm/atomic.h>
-
-struct probe_data {
-	const char *name;
-	const char *format;
-	marker_probe_func *probe_func;
-};
-
-void probe_subsystem_event(void *probe_data, void *call_data,
-	const char *format, va_list *args)
-{
-	/* Declare args */
-	unsigned int value;
-	const char *mystr;
-
-	/* Assign args */
-	value = va_arg(*args, typeof(value));
-	mystr = va_arg(*args, typeof(mystr));
-
-	/* Call printk */
-	printk(KERN_INFO "Value %u, string %s\n", value, mystr);
-
-	/* or count, check rights, serialize data in a buffer */
-}
-
-atomic_t eventb_count = ATOMIC_INIT(0);
-
-void probe_subsystem_eventb(void *probe_data, void *call_data,
-	const char *format, va_list *args)
-{
-	/* Increment counter */
-	atomic_inc(&eventb_count);
-}
-
-static struct probe_data probe_array[] =
-{
-	{	.name = "subsystem_event",
-		.format = "integer %d string %s",
-		.probe_func = probe_subsystem_event },
-	{	.name = "subsystem_eventb",
-		.format = MARK_NOARGS,
-		.probe_func = probe_subsystem_eventb },
-};
-
-static int __init probe_init(void)
-{
-	int result;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
-		result = marker_probe_register(probe_array[i].name,
-				probe_array[i].format,
-				probe_array[i].probe_func, &probe_array[i]);
-		if (result)
-			printk(KERN_INFO "Unable to register probe %s\n",
-				probe_array[i].name);
-	}
-	return 0;
-}
-
-static void __exit probe_fini(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(probe_array); i++)
-		marker_probe_unregister(probe_array[i].name,
-			probe_array[i].probe_func, &probe_array[i]);
-	printk(KERN_INFO "Number of event b : %u\n",
-			atomic_read(&eventb_count));
-	marker_synchronize_unregister();
-}
-
-module_init(probe_init);
-module_exit(probe_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mathieu Desnoyers");
-MODULE_DESCRIPTION("SUBSYSTEM Probe");
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index f4053dc..8f14c81 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -13,7 +13,6 @@
 # 2) modpost is then used to
 # 3)  create one <module>.mod.c file pr. module
 # 4)  create one Module.symvers file with CRC for all exported symbols
-# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
 # 5) compile all <module>.mod.c files
 # 6) final link of the module to a <module.ko> file
 
@@ -59,10 +58,6 @@ include scripts/Makefile.lib
 
 kernelsymfile := $(objtree)/Module.symvers
 modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
-kernelmarkersfile := $(objtree)/Module.markers
-modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
-
-markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
 
 # Step 1), find all modules listed in $(MODVERDIR)/
 __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
@@ -85,8 +80,6 @@ modpost = scripts/mod/modpost                    \
  $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \
  $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
  $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S)      \
- $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
- $(if $(CONFIG_MARKERS),-M $(markersfile))	 \
  $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \
  $(if $(cross_build),-c)
 
@@ -101,17 +94,12 @@ quiet_cmd_kernel-mod = MODPOST $@
       cmd_kernel-mod = $(modpost) $@
 
 vmlinux.o: FORCE
-	@rm -fr $(kernelmarkersfile)
 	$(call cmd,kernel-mod)
 
 # Declare generated files as targets for modpost
 $(symverfile):         __modpost ;
 $(modules:.ko=.mod.c): __modpost ;
 
-ifdef CONFIG_MARKERS
-$(markersfile):	       __modpost ;
-endif
-
 
 # Step 5), compile all *.mod.c files
 

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-08-25 18:02 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-08-25 18:02 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt,
	=?unknown-8bit?B?RnLDqWTDqXJpYw==?= Weisbecker

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Ingo Molnar (1):
      tracing: Fix too large stack usage in do_one_initcall()

Jiri Olsa (1):
      tracing: handle broken names in ftrace filter

Zhaolei (1):
      ftrace: Unify effect of writing to trace_options and option/*


 init/main.c           |    7 ++++---
 kernel/trace/ftrace.c |   17 +++++++++++------
 kernel/trace/trace.c  |   12 ++----------
 3 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/init/main.c b/init/main.c
index 2c5ade7..98e679e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -733,13 +733,14 @@ static void __init do_ctors(void)
 int initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
 
+static char msgbuf[64];
+static struct boot_trace_call call;
+static struct boot_trace_ret ret;
+
 int do_one_initcall(initcall_t fn)
 {
 	int count = preempt_count();
 	ktime_t calltime, delta, rettime;
-	char msgbuf[64];
-	struct boot_trace_call call;
-	struct boot_trace_ret ret;
 
 	if (initcall_debug) {
 		call.caller = task_pid_nr(current);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1e1d23c..25edd5c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2278,7 +2278,11 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 	read++;
 	cnt--;
 
-	if (!(iter->flags & ~FTRACE_ITER_CONT)) {
+	/*
+	 * If the parser haven't finished with the last write,
+	 * continue reading the user input without skipping spaces.
+	 */
+	if (!(iter->flags & FTRACE_ITER_CONT)) {
 		/* skip white space */
 		while (cnt && isspace(ch)) {
 			ret = get_user(ch, ubuf++);
@@ -2288,8 +2292,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 			cnt--;
 		}
 
+		/* only spaces were written */
 		if (isspace(ch)) {
-			file->f_pos += read;
+			*ppos += read;
 			ret = read;
 			goto out;
 		}
@@ -2319,12 +2324,12 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 		if (ret)
 			goto out;
 		iter->buffer_idx = 0;
-	} else
+	} else {
 		iter->flags |= FTRACE_ITER_CONT;
+		iter->buffer[iter->buffer_idx++] = ch;
+	}
 
-
-	file->f_pos += read;
-
+	*ppos += read;
 	ret = read;
  out:
 	mutex_unlock(&ftrace_regex_lock);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c22b40f..8c35839 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3896,17 +3896,9 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	if (ret < 0)
 		return ret;
 
-	switch (val) {
-	case 0:
-		trace_flags &= ~(1 << index);
-		break;
-	case 1:
-		trace_flags |= 1 << index;
-		break;
-
-	default:
+	if (val != 0 && val != 1)
 		return -EINVAL;
-	}
+	set_tracer_flags(1 << index, val);
 
 	*ppos += cnt;
 

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-08-09 16:08 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-08-09 16:08 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt, Frédéric Weisbecker,
	Peter Zijlstra, Thomas Gleixner

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

Note, these two commits:

 f413cdb: perf_counter: Fix/complete ftrace event records sampling
 3a65930: perf_counter, ftrace: Fix perf_counter integration

are not regression fixes as perfcounters are new in .31, but 
fixes/enhancements to perfcounters+tracepoints integration. I kept 
them at the end of the tree and can zap them if they are a problem.

 Thanks,

	Ingo

------------------>
Eric Dumazet (1):
      ring-buffer: Fix memleak in ring_buffer_free()

Frederic Weisbecker (1):
      perf_counter: Fix/complete ftrace event records sampling

Peter Zijlstra (1):
      perf_counter, ftrace: Fix perf_counter integration

Robert Richter (1):
      ring-buffer: Fix advance of reader in rb_buffer_peek()

Steven Rostedt (4):
      ring-buffer: fix check of try_to_discard result
      ring-buffer: do not disable ring buffer on oops_in_progress
      tracing: do not use functions starting with .L in recordmcount.pl
      tracing: Fix recordmcount.pl to handle sections with only weak functions

Tom Zanussi (2):
      tracing/filters: Don't use pred on alloc failure
      tracing/filters: Always free pred on filter_add_subsystem_pred() failure


 include/linux/ftrace_event.h       |    4 +-
 include/linux/perf_counter.h       |    9 ++-
 include/trace/ftrace.h             |  172 ++++++++++++++++++++++++++++++-----
 kernel/perf_counter.c              |   22 ++++-
 kernel/trace/ring_buffer.c         |   15 ++--
 kernel/trace/trace.c               |    1 +
 kernel/trace/trace.h               |    4 -
 kernel/trace/trace_events_filter.c |   20 +++-
 scripts/recordmcount.pl            |    9 ++-
 tools/perf/builtin-record.c        |    1 +
 10 files changed, 207 insertions(+), 50 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index d7cd193..a81170d 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -89,7 +89,9 @@ enum print_line_t {
 	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
 };
 
-
+void tracing_generic_entry_update(struct trace_entry *entry,
+				  unsigned long flags,
+				  int pc);
 struct ring_buffer_event *
 trace_current_buffer_lock_reserve(int type, unsigned long len,
 				  unsigned long flags, int pc);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e604e6e..a67dd5c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -121,8 +121,9 @@ enum perf_counter_sample_format {
 	PERF_SAMPLE_CPU				= 1U << 7,
 	PERF_SAMPLE_PERIOD			= 1U << 8,
 	PERF_SAMPLE_STREAM_ID			= 1U << 9,
+	PERF_SAMPLE_TP_RECORD			= 1U << 10,
 
-	PERF_SAMPLE_MAX = 1U << 10,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
 };
 
 /*
@@ -413,6 +414,11 @@ struct perf_callchain_entry {
 	__u64				ip[PERF_MAX_STACK_DEPTH];
 };
 
+struct perf_tracepoint_record {
+	int				size;
+	char				*record;
+};
+
 struct task_struct;
 
 /**
@@ -681,6 +687,7 @@ struct perf_sample_data {
 	struct pt_regs			*regs;
 	u64				addr;
 	u64				period;
+	void				*private;
 };
 
 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 1867553..7fb16d9 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -144,6 +144,9 @@
 #undef TP_fast_assign
 #define TP_fast_assign(args...) args
 
+#undef TP_perf_assign
+#define TP_perf_assign(args...)
+
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
 static int								\
@@ -345,6 +348,56 @@ static inline int ftrace_get_offsets_##call(				\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+#ifdef CONFIG_EVENT_PROFILE
+
+/*
+ * Generate the functions needed for tracepoint perf_counter support.
+ *
+ * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
+ *
+ * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call)
+ * {
+ * 	int ret = 0;
+ *
+ * 	if (!atomic_inc_return(&event_call->profile_count))
+ * 		ret = register_trace_<call>(ftrace_profile_<call>);
+ *
+ * 	return ret;
+ * }
+ *
+ * static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call)
+ * {
+ * 	if (atomic_add_negative(-1, &event->call->profile_count))
+ * 		unregister_trace_<call>(ftrace_profile_<call>);
+ * }
+ *
+ */
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+									\
+static void ftrace_profile_##call(proto);				\
+									\
+static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
+{									\
+	int ret = 0;							\
+									\
+	if (!atomic_inc_return(&event_call->profile_count))		\
+		ret = register_trace_##call(ftrace_profile_##call);	\
+									\
+	return ret;							\
+}									\
+									\
+static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
+{									\
+	if (atomic_add_negative(-1, &event_call->profile_count))	\
+		unregister_trace_##call(ftrace_profile_##call);		\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#endif
+
 /*
  * Stage 4 of the trace events.
  *
@@ -447,28 +500,6 @@ static inline int ftrace_get_offsets_##call(				\
 #define TP_FMT(fmt, args...)	fmt "\n", ##args
 
 #ifdef CONFIG_EVENT_PROFILE
-#define _TRACE_PROFILE(call, proto, args)				\
-static void ftrace_profile_##call(proto)				\
-{									\
-	extern void perf_tpcounter_event(int);				\
-	perf_tpcounter_event(event_##call.id);				\
-}									\
-									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
-{									\
-	int ret = 0;							\
-									\
-	if (!atomic_inc_return(&event_call->profile_count))		\
-		ret = register_trace_##call(ftrace_profile_##call);	\
-									\
-	return ret;							\
-}									\
-									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
-{									\
-	if (atomic_add_negative(-1, &event_call->profile_count))	\
-		unregister_trace_##call(ftrace_profile_##call);		\
-}
 
 #define _TRACE_PROFILE_INIT(call)					\
 	.profile_count = ATOMIC_INIT(-1),				\
@@ -476,7 +507,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
 	.profile_disable = ftrace_profile_disable_##call,
 
 #else
-#define _TRACE_PROFILE(call, proto, args)
 #define _TRACE_PROFILE_INIT(call)
 #endif
 
@@ -502,7 +532,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
 									\
 static struct ftrace_event_call event_##call;				\
 									\
@@ -586,6 +615,99 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-#undef _TRACE_PROFILE
+/*
+ * Define the insertion callback to profile events
+ *
+ * The job is very similar to ftrace_raw_event_<call> except that we don't
+ * insert in the ring buffer but in a perf counter.
+ *
+ * static void ftrace_profile_<call>(proto)
+ * {
+ *	struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
+ *	struct ftrace_event_call *event_call = &event_<call>;
+ *	extern void perf_tpcounter_event(int, u64, u64, void *, int);
+ *	struct ftrace_raw_##call *entry;
+ *	u64 __addr = 0, __count = 1;
+ *	unsigned long irq_flags;
+ *	int __entry_size;
+ *	int __data_size;
+ *	int pc;
+ *
+ *	local_save_flags(irq_flags);
+ *	pc = preempt_count();
+ *
+ *	__data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
+ *	__entry_size = __data_size + sizeof(*entry);
+ *
+ *	do {
+ *		char raw_data[__entry_size]; <- allocate our sample in the stack
+ *		struct trace_entry *ent;
+ *
+ *		entry = (struct ftrace_raw_<call> *)raw_data;
+ *		ent = &entry->ent;
+ *		tracing_generic_entry_update(ent, irq_flags, pc);
+ *		ent->type = event_call->id;
+ *
+ *		<tstruct> <- do some jobs with dynamic arrays
+ *
+ *		<assign>  <- affect our values
+ *
+ *		perf_tpcounter_event(event_call->id, __addr, __count, entry,
+ *			     __entry_size);  <- submit them to perf counter
+ *	} while (0);
+ *
+ * }
+ */
+
+#ifdef CONFIG_EVENT_PROFILE
+
+#undef __perf_addr
+#define __perf_addr(a) __addr = (a)
+
+#undef __perf_count
+#define __perf_count(c) __count = (c)
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+static void ftrace_profile_##call(proto)				\
+{									\
+	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
+	struct ftrace_event_call *event_call = &event_##call;		\
+	extern void perf_tpcounter_event(int, u64, u64, void *, int);	\
+	struct ftrace_raw_##call *entry;				\
+	u64 __addr = 0, __count = 1;					\
+	unsigned long irq_flags;					\
+	int __entry_size;						\
+	int __data_size;						\
+	int pc;								\
+									\
+	local_save_flags(irq_flags);					\
+	pc = preempt_count();						\
+									\
+	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
+	__entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\
+									\
+	do {								\
+		char raw_data[__entry_size];				\
+		struct trace_entry *ent;				\
+									\
+		entry = (struct ftrace_raw_##call *)raw_data;		\
+		ent = &entry->ent;					\
+		tracing_generic_entry_update(ent, irq_flags, pc);	\
+		ent->type = event_call->id;				\
+									\
+		tstruct							\
+									\
+		{ assign; }						\
+									\
+		perf_tpcounter_event(event_call->id, __addr, __count, entry,\
+			     __entry_size);				\
+	} while (0);							\
+									\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+#endif /* CONFIG_EVENT_PROFILE */
+
 #undef _TRACE_PROFILE_INIT
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 673c1aa..8681021 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2646,6 +2646,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		u64 counter;
 	} group_entry;
 	struct perf_callchain_entry *callchain = NULL;
+	struct perf_tracepoint_record *tp;
 	int callchain_size = 0;
 	u64 time;
 	struct {
@@ -2714,6 +2715,11 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 			header.size += sizeof(u64);
 	}
 
+	if (sample_type & PERF_SAMPLE_TP_RECORD) {
+		tp = data->private;
+		header.size += tp->size;
+	}
+
 	ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
 	if (ret)
 		return;
@@ -2777,6 +2783,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		}
 	}
 
+	if (sample_type & PERF_SAMPLE_TP_RECORD)
+		perf_output_copy(&handle, tp->record, tp->size);
+
 	perf_output_end(&handle);
 }
 
@@ -3703,17 +3712,24 @@ static const struct pmu perf_ops_task_clock = {
 };
 
 #ifdef CONFIG_EVENT_PROFILE
-void perf_tpcounter_event(int event_id)
+void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
+			  int entry_size)
 {
+	struct perf_tracepoint_record tp = {
+		.size = entry_size,
+		.record = record,
+	};
+
 	struct perf_sample_data data = {
 		.regs = get_irq_regs(),
-		.addr = 0,
+		.addr = addr,
+		.private = &tp,
 	};
 
 	if (!data.regs)
 		data.regs = task_pt_regs(current);
 
-	do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data);
+	do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data);
 }
 EXPORT_SYMBOL_GPL(perf_tpcounter_event);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bf27bb7..a330513 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -735,6 +735,7 @@ ring_buffer_free(struct ring_buffer *buffer)
 
 	put_online_cpus();
 
+	kfree(buffer->buffers);
 	free_cpumask_var(buffer->cpumask);
 
 	kfree(buffer);
@@ -1785,7 +1786,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 	 */
 	RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
 
-	if (!rb_try_to_discard(cpu_buffer, event))
+	if (rb_try_to_discard(cpu_buffer, event))
 		goto out;
 
 	/*
@@ -2383,7 +2384,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 		 * the box. Return the padding, and we will release
 		 * the current locks, and try again.
 		 */
-		rb_advance_reader(cpu_buffer);
 		return event;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
@@ -2486,7 +2486,7 @@ static inline int rb_ok_to_lock(void)
 	 * buffer too. A one time deal is all you get from reading
 	 * the ring buffer from an NMI.
 	 */
-	if (likely(!in_nmi() && !oops_in_progress))
+	if (likely(!in_nmi()))
 		return 1;
 
 	tracing_off_permanent();
@@ -2519,6 +2519,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 	if (dolock)
 		spin_lock(&cpu_buffer->reader_lock);
 	event = rb_buffer_peek(buffer, cpu, ts);
+	if (event && event->type_len == RINGBUF_TYPE_PADDING)
+		rb_advance_reader(cpu_buffer);
 	if (dolock)
 		spin_unlock(&cpu_buffer->reader_lock);
 	local_irq_restore(flags);
@@ -2590,12 +2592,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 		spin_lock(&cpu_buffer->reader_lock);
 
 	event = rb_buffer_peek(buffer, cpu, ts);
-	if (!event)
-		goto out_unlock;
-
-	rb_advance_reader(cpu_buffer);
+	if (event)
+		rb_advance_reader(cpu_buffer);
 
- out_unlock:
 	if (dolock)
 		spin_unlock(&cpu_buffer->reader_lock);
 	local_irq_restore(flags);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8930e39..c22b40f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -848,6 +848,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
 		(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
 }
+EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
 						    int type,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3548ae5..8b9f4f6 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -438,10 +438,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
 					  int *ent_cpu, u64 *ent_ts);
 
-void tracing_generic_entry_update(struct trace_entry *entry,
-				  unsigned long flags,
-				  int pc);
-
 void default_wait_pipe(struct trace_iterator *iter);
 void poll_wait_pipe(struct trace_iterator *iter);
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 936c621..f32dc9d 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -624,9 +624,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
 		return -ENOSPC;
 	}
 
-	filter->preds[filter->n_preds] = pred;
-	filter->n_preds++;
-
 	list_for_each_entry(call, &ftrace_events, list) {
 
 		if (!call->define_fields)
@@ -643,6 +640,9 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
 		}
 		replace_filter_string(call->filter, filter_string);
 	}
+
+	filter->preds[filter->n_preds] = pred;
+	filter->n_preds++;
 out:
 	return err;
 }
@@ -1029,12 +1029,17 @@ static int replace_preds(struct event_subsystem *system,
 
 		if (elt->op == OP_AND || elt->op == OP_OR) {
 			pred = create_logical_pred(elt->op);
+			if (!pred)
+				return -ENOMEM;
 			if (call) {
 				err = filter_add_pred(ps, call, pred);
 				filter_free_pred(pred);
-			} else
+			} else {
 				err = filter_add_subsystem_pred(ps, system,
 							pred, filter_string);
+				if (err)
+					filter_free_pred(pred);
+			}
 			if (err)
 				return err;
 
@@ -1048,12 +1053,17 @@ static int replace_preds(struct event_subsystem *system,
 		}
 
 		pred = create_pred(elt->op, operand1, operand2);
+		if (!pred)
+			return -ENOMEM;
 		if (call) {
 			err = filter_add_pred(ps, call, pred);
 			filter_free_pred(pred);
-		} else
+		} else {
 			err = filter_add_subsystem_pred(ps, system, pred,
 							filter_string);
+			if (err)
+				filter_free_pred(pred);
+		}
 		if (err)
 			return err;
 
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index d29baa2..911ba7f 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -393,7 +393,7 @@ while (<IN>) {
 	    $read_function = 0;
 	}
 	# print out any recorded offsets
-	update_funcs() if ($text_found);
+	update_funcs() if (defined($ref_func));
 
 	# reset all markers and arrays
 	$text_found = 0;
@@ -414,7 +414,10 @@ while (<IN>) {
 	    $offset = hex $1;
 	} else {
 	    # if we already have a function, and this is weak, skip it
-	    if (!defined($ref_func) && !defined($weak{$text})) {
+	    if (!defined($ref_func) && !defined($weak{$text}) &&
+		 # PPC64 can have symbols that start with .L and
+		 # gcc considers these special. Don't use them!
+		 $text !~ /^\.L/) {
 		$ref_func = $text;
 		$offset = hex $1;
 	    }
@@ -441,7 +444,7 @@ while (<IN>) {
 }
 
 # dump out anymore offsets that may have been found
-update_funcs() if ($text_found);
+update_funcs() if (defined($ref_func));
 
 # If we did not find any mcount callers, we are done (do nothing).
 if (!$opened) {
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6da0992..90c9808 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -412,6 +412,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
 	if (call_graph)
 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
 
+
 	attr->mmap		= track;
 	attr->comm		= track;
 	attr->inherit		= (cpu < 0) && inherit;

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-08-04 19:01 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-08-04 19:01 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt, Thomas Gleixner,
	Frédéric Weisbecker

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Lai Jiangshan (2):
      tracing: Fix invalid function_graph entry
      tracing: Fix missing function_graph events when we splice_read from trace_pipe

Li Zefan (3):
      trace_stack: Fix seqfile memory leak
      function-graph: Fix seqfile memory leak
      tracing/stat: Fix seqfile memory leak

Matt Fleming (2):
      ftrace: Fix the conditional that updates $ref_func
      ftrace: Only update $offset when we update $ref_func

Mel Gorman (1):
      profile: Suppress warning about large allocations when profile=1 is specified

Steven Rostedt (2):
      tracing: show proper address for trace-printk format
      tracing: only truncate ftrace files when O_TRUNC is set

Thomas Gleixner (1):
      trace: stop tracer in oops_enter()


 kernel/panic.c                       |    1 +
 kernel/profile.c                     |    5 +++--
 kernel/trace/ftrace.c                |   19 ++++++++++++++-----
 kernel/trace/trace.c                 |   12 ++++++++----
 kernel/trace/trace_events.c          |    2 +-
 kernel/trace/trace_functions_graph.c |   11 +++++++++--
 kernel/trace/trace_printk.c          |    2 +-
 kernel/trace/trace_stack.c           |    7 ++-----
 kernel/trace/trace_stat.c            |   34 ++++++++++++++++++++++------------
 scripts/recordmcount.pl              |    5 +++--
 10 files changed, 64 insertions(+), 34 deletions(-)

diff --git a/kernel/panic.c b/kernel/panic.c
index 984b3ec..512ab73 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -301,6 +301,7 @@ int oops_may_print(void)
  */
 void oops_enter(void)
 {
+	tracing_off();
 	/* can't trust the integrity of the kernel anymore: */
 	debug_locks_off();
 	do_oops_enter_exit();
diff --git a/kernel/profile.c b/kernel/profile.c
index 69911b5..419250e 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -117,11 +117,12 @@ int __ref profile_init(void)
 
 	cpumask_copy(prof_cpu_mask, cpu_possible_mask);
 
-	prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
+	prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN);
 	if (prof_buffer)
 		return 0;
 
-	prof_buffer = alloc_pages_exact(buffer_bytes, GFP_KERNEL|__GFP_ZERO);
+	prof_buffer = alloc_pages_exact(buffer_bytes,
+					GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
 	if (prof_buffer)
 		return 0;
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4521c77..1e1d23c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1662,7 +1662,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
 
 	mutex_lock(&ftrace_regex_lock);
 	if ((file->f_mode & FMODE_WRITE) &&
-	    !(file->f_flags & O_APPEND))
+	    (file->f_flags & O_TRUNC))
 		ftrace_filter_reset(enable);
 
 	if (file->f_mode & FMODE_READ) {
@@ -2577,7 +2577,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
 
 	mutex_lock(&graph_lock);
 	if ((file->f_mode & FMODE_WRITE) &&
-	    !(file->f_flags & O_APPEND)) {
+	    (file->f_flags & O_TRUNC)) {
 		ftrace_graph_count = 0;
 		memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
 	}
@@ -2596,6 +2596,14 @@ ftrace_graph_open(struct inode *inode, struct file *file)
 }
 
 static int
+ftrace_graph_release(struct inode *inode, struct file *file)
+{
+	if (file->f_mode & FMODE_READ)
+		seq_release(inode, file);
+	return 0;
+}
+
+static int
 ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 {
 	struct dyn_ftrace *rec;
@@ -2724,9 +2732,10 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 }
 
 static const struct file_operations ftrace_graph_fops = {
-	.open = ftrace_graph_open,
-	.read = seq_read,
-	.write = ftrace_graph_write,
+	.open		= ftrace_graph_open,
+	.read		= seq_read,
+	.write		= ftrace_graph_write,
+	.release	= ftrace_graph_release,
 };
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8bc8d8a..8930e39 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2031,7 +2031,7 @@ static int tracing_open(struct inode *inode, struct file *file)
 
 	/* If this file was open for write, then erase contents */
 	if ((file->f_mode & FMODE_WRITE) &&
-	    !(file->f_flags & O_APPEND)) {
+	    (file->f_flags & O_TRUNC)) {
 		long cpu = (long) inode->i_private;
 
 		if (cpu == TRACE_PIPE_ALL_CPU)
@@ -3085,7 +3085,8 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
 			break;
 		}
 
-		trace_consume(iter);
+		if (ret != TRACE_TYPE_NO_CONSUME)
+			trace_consume(iter);
 		rem -= count;
 		if (!find_next_entry_inc(iter))	{
 			rem = 0;
@@ -4233,8 +4234,11 @@ static void __ftrace_dump(bool disable_tracing)
 		iter.pos = -1;
 
 		if (find_next_entry_inc(&iter) != NULL) {
-			print_trace_line(&iter);
-			trace_consume(&iter);
+			int ret;
+
+			ret = print_trace_line(&iter);
+			if (ret != TRACE_TYPE_NO_CONSUME)
+				trace_consume(&iter);
 		}
 
 		trace_printk_seq(&iter.seq);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53c8fd3..23d2972 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -376,7 +376,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file)
 	const struct seq_operations *seq_ops;
 
 	if ((file->f_mode & FMODE_WRITE) &&
-	    !(file->f_flags & O_APPEND))
+	    (file->f_flags & O_TRUNC))
 		ftrace_clear_events();
 
 	seq_ops = inode->i_private;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d2249ab..420ec34 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -843,9 +843,16 @@ print_graph_function(struct trace_iterator *iter)
 
 	switch (entry->type) {
 	case TRACE_GRAPH_ENT: {
-		struct ftrace_graph_ent_entry *field;
+		/*
+		 * print_graph_entry() may consume the current event,
+		 * thus @field may become invalid, so we need to save it.
+		 * sizeof(struct ftrace_graph_ent_entry) is very small,
+		 * it can be safely saved at the stack.
+		 */
+		struct ftrace_graph_ent_entry *field, saved;
 		trace_assign_type(field, entry);
-		return print_graph_entry(field, s, iter);
+		saved = *field;
+		return print_graph_entry(&saved, s, iter);
 	}
 	case TRACE_GRAPH_RET: {
 		struct ftrace_graph_ret_entry *field;
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 7b62781..687699d 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -176,7 +176,7 @@ static int t_show(struct seq_file *m, void *v)
 	const char *str = *fmt;
 	int i;
 
-	seq_printf(m, "0x%lx : \"", (unsigned long)fmt);
+	seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt);
 
 	/*
 	 * Tabs and new lines need to be converted.
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index e644af9..6a2a9d4 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -301,17 +301,14 @@ static const struct seq_operations stack_trace_seq_ops = {
 
 static int stack_trace_open(struct inode *inode, struct file *file)
 {
-	int ret;
-
-	ret = seq_open(file, &stack_trace_seq_ops);
-
-	return ret;
+	return seq_open(file, &stack_trace_seq_ops);
 }
 
 static const struct file_operations stack_trace_fops = {
 	.open		= stack_trace_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
+	.release	= seq_release,
 };
 
 int
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index e66f5e4..aea321c 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -73,7 +73,7 @@ static struct rb_node *release_next(struct rb_node *node)
 	}
 }
 
-static void reset_stat_session(struct stat_session *session)
+static void __reset_stat_session(struct stat_session *session)
 {
 	struct rb_node *node = session->stat_root.rb_node;
 
@@ -83,10 +83,17 @@ static void reset_stat_session(struct stat_session *session)
 	session->stat_root = RB_ROOT;
 }
 
+static void reset_stat_session(struct stat_session *session)
+{
+	mutex_lock(&session->stat_mutex);
+	__reset_stat_session(session);
+	mutex_unlock(&session->stat_mutex);
+}
+
 static void destroy_session(struct stat_session *session)
 {
 	debugfs_remove(session->file);
-	reset_stat_session(session);
+	__reset_stat_session(session);
 	mutex_destroy(&session->stat_mutex);
 	kfree(session);
 }
@@ -150,7 +157,7 @@ static int stat_seq_init(struct stat_session *session)
 	int i;
 
 	mutex_lock(&session->stat_mutex);
-	reset_stat_session(session);
+	__reset_stat_session(session);
 
 	if (!ts->stat_cmp)
 		ts->stat_cmp = dummy_cmp;
@@ -183,7 +190,7 @@ exit:
 	return ret;
 
 exit_free_rbtree:
-	reset_stat_session(session);
+	__reset_stat_session(session);
 	mutex_unlock(&session->stat_mutex);
 	return ret;
 }
@@ -250,16 +257,21 @@ static const struct seq_operations trace_stat_seq_ops = {
 static int tracing_stat_open(struct inode *inode, struct file *file)
 {
 	int ret;
-
+	struct seq_file *m;
 	struct stat_session *session = inode->i_private;
 
+	ret = stat_seq_init(session);
+	if (ret)
+		return ret;
+
 	ret = seq_open(file, &trace_stat_seq_ops);
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-		m->private = session;
-		ret = stat_seq_init(session);
+	if (ret) {
+		reset_stat_session(session);
+		return ret;
 	}
 
+	m = file->private_data;
+	m->private = session;
 	return ret;
 }
 
@@ -270,11 +282,9 @@ static int tracing_stat_release(struct inode *i, struct file *f)
 {
 	struct stat_session *session = i->i_private;
 
-	mutex_lock(&session->stat_mutex);
 	reset_stat_session(session);
-	mutex_unlock(&session->stat_mutex);
 
-	return 0;
+	return seq_release(i, f);
 }
 
 static const struct file_operations tracing_stat_fops = {
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 7109e2b..d29baa2 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -403,7 +403,6 @@ while (<IN>) {
     # section found, now is this a start of a function?
     } elsif ($read_function && /$function_regex/) {
 	$text_found = 1;
-	$offset = hex $1;
 	$text = $2;
 
 	# if this is either a local function or a weak function
@@ -412,10 +411,12 @@ while (<IN>) {
 	if (!defined($locals{$text}) && !defined($weak{$text})) {
 	    $ref_func = $text;
 	    $read_function = 0;
+	    $offset = hex $1;
 	} else {
 	    # if we already have a function, and this is weak, skip it
-	    if (!defined($ref_func) || !defined($weak{$text})) {
+	    if (!defined($ref_func) && !defined($weak{$text})) {
 		$ref_func = $text;
+		$offset = hex $1;
 	    }
 	}
     } elsif ($read_headers && /$mcount_section/) {

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-07-10 16:25 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-07-10 16:25 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Lai Jiangshan (1):
      trace_export: Repair missed fields

Li Zefan (2):
      tracing: Fix stack tracer sysctl handling
      tracing/fastboot: Document the need of initcall_debug

Masami Hiramatsu (1):
      kprobes: No need to unlock kprobe_insn_mutex

Xiao Guangrong (1):
      tracing: Fix trace_print_seq()


 kernel/kprobes.c                 |    6 +-----
 kernel/trace/Kconfig             |    6 +++---
 kernel/trace/ftrace.c            |    4 ++--
 kernel/trace/trace_event_types.h |    3 +++
 kernel/trace/trace_output.c      |    3 +--
 kernel/trace/trace_stack.c       |    4 ++--
 6 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c0fa54b..16b5739 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -237,13 +237,9 @@ static int __kprobes collect_garbage_slots(void)
 {
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos, *next;
-	int safety;
 
 	/* Ensure no-one is preepmted on the garbages */
-	mutex_unlock(&kprobe_insn_mutex);
-	safety = check_safety();
-	mutex_lock(&kprobe_insn_mutex);
-	if (safety != 0)
+	if (check_safety())
 		return -EAGAIN;
 
 	hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1551f47..019f380 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -226,13 +226,13 @@ config BOOT_TRACER
 	  the timings of the initcalls and traces key events and the identity
 	  of tasks that can cause boot delays, such as context-switches.
 
-	  Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+	  Its aim is to be parsed by the scripts/bootgraph.pl tool to
 	  produce pretty graphics about boot inefficiencies, giving a visual
 	  representation of the delays during initcalls - but the raw
 	  /debug/tracing/trace text output is readable too.
 
-	  You must pass in ftrace=initcall to the kernel command line
-	  to enable this on bootup.
+	  You must pass in initcall_debug and ftrace=initcall to the kernel
+	  command line to enable this on bootup.
 
 config TRACE_BRANCH_PROFILING
 	bool
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f3716bf..bce9e01 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3160,10 +3160,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 	ret  = proc_dointvec(table, write, file, buffer, lenp, ppos);
 
-	if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
+	if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
 		goto out;
 
-	last_ftrace_enabled = ftrace_enabled;
+	last_ftrace_enabled = !!ftrace_enabled;
 
 	if (ftrace_enabled) {
 
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 5e32e37..6db005e 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -26,6 +26,9 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
 		   ftrace_graph_ret_entry, ignore,
 	TRACE_STRUCT(
 		TRACE_FIELD(unsigned long, ret.func, func)
+		TRACE_FIELD(unsigned long long, ret.calltime, calltime)
+		TRACE_FIELD(unsigned long long, ret.rettime, rettime)
+		TRACE_FIELD(unsigned long, ret.overrun, overrun)
 		TRACE_FIELD(int, ret.depth, depth)
 	),
 	TP_RAW_FMT("<-- %lx (%d)")
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7938f3a..e0c2545 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -27,8 +27,7 @@ void trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
 	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
 
-	s->buffer[len] = 0;
-	seq_puts(m, s->buffer);
+	seq_write(m, s->buffer, len);
 
 	trace_seq_init(s);
 }
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2d7aebd..e644af9 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -326,10 +326,10 @@ stack_trace_sysctl(struct ctl_table *table, int write,
 	ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
 
 	if (ret || !write ||
-	    (last_stack_tracer_enabled == stack_tracer_enabled))
+	    (last_stack_tracer_enabled == !!stack_tracer_enabled))
 		goto out;
 
-	last_stack_tracer_enabled = stack_tracer_enabled;
+	last_stack_tracer_enabled = !!stack_tracer_enabled;
 
 	if (stack_tracer_enabled)
 		register_ftrace_function(&trace_ops);

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-06-26 18:56 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-06-26 18:56 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Steven Rostedt, Li Zefan

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Li Zefan (10):
      tracing/events: Don't increment @pos in s_start()
      tracing_bprintk: Don't increment @pos in t_start()
      trace_stat: Don't increment @pos in seq start()
      tracing: Reset iterator in t_start()
      ftrace: Don't increment @pos in g_start()
      ftrace: Don't manipulate @pos in t_start()
      ftrace: Fix t_hash_start()
      tracing: Fix trace_buf_size boot option
      ftrace: Remove duplicate newline
      ftrace: Fix the output of profile

Paul Mundt (1):
      ring-buffer: Make it generally available


 Documentation/kernel-parameters.txt |    3 +-
 kernel/Makefile                     |    1 +
 kernel/trace/ftrace.c               |   56 ++++++++++++++++++++---------------
 kernel/trace/ring_buffer.c          |   11 +++++++
 kernel/trace/trace.c                |   23 ++++----------
 kernel/trace/trace.h                |    7 ++++
 kernel/trace/trace_events.c         |   28 +++++++++++++----
 kernel/trace/trace_functions.c      |    3 +-
 kernel/trace/trace_printk.c         |   26 ++++------------
 kernel/trace/trace_stat.c           |    6 +---
 10 files changed, 89 insertions(+), 75 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 92e1ab8..d3f41db 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2475,7 +2475,8 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	tp720=		[HW,PS2]
 
-	trace_buf_size=nn[KMG] [ftrace] will set tracing buffer size.
+	trace_buf_size=nn[KMG]
+			[FTRACE] will set tracing buffer size.
 
 	trix=		[HW,OSS] MediaTrix AudioTrix Pro
 			Format:
diff --git a/kernel/Makefile b/kernel/Makefile
index 0a32cb2..0630e29 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -96,6 +96,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_X86_DS) += trace/
+obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
 obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3718d55..f3716bf 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -291,7 +291,9 @@ function_stat_next(void *v, int idx)
 	pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
 
  again:
-	rec++;
+	if (idx != 0)
+		rec++;
+
 	if ((void *)rec >= (void *)&pg->records[pg->index]) {
 		pg = pg->next;
 		if (!pg)
@@ -1417,10 +1419,20 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
 	void *p = NULL;
+	loff_t l;
+
+	if (!(iter->flags & FTRACE_ITER_HASH))
+		*pos = 0;
 
 	iter->flags |= FTRACE_ITER_HASH;
 
-	return t_hash_next(m, p, pos);
+	iter->hidx = 0;
+	for (l = 0; l <= *pos; ) {
+		p = t_hash_next(m, p, &l);
+		if (!p)
+			break;
+	}
+	return p;
 }
 
 static int t_hash_show(struct seq_file *m, void *v)
@@ -1467,8 +1479,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 			iter->pg = iter->pg->next;
 			iter->idx = 0;
 			goto retry;
-		} else {
-			iter->idx = -1;
 		}
 	} else {
 		rec = &iter->pg->records[iter->idx++];
@@ -1497,6 +1507,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
 	void *p = NULL;
+	loff_t l;
 
 	mutex_lock(&ftrace_lock);
 	/*
@@ -1508,23 +1519,21 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 		if (*pos > 0)
 			return t_hash_start(m, pos);
 		iter->flags |= FTRACE_ITER_PRINTALL;
-		(*pos)++;
 		return iter;
 	}
 
 	if (iter->flags & FTRACE_ITER_HASH)
 		return t_hash_start(m, pos);
 
-	if (*pos > 0) {
-		if (iter->idx < 0)
-			return p;
-		(*pos)--;
-		iter->idx--;
+	iter->pg = ftrace_pages_start;
+	iter->idx = 0;
+	for (l = 0; l <= *pos; ) {
+		p = t_next(m, p, &l);
+		if (!p)
+			break;
 	}
 
-	p = t_next(m, p, pos);
-
-	if (!p)
+	if (!p && iter->flags & FTRACE_ITER_FILTER)
 		return t_hash_start(m, pos);
 
 	return p;
@@ -2500,32 +2509,31 @@ int ftrace_graph_count;
 unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
 
 static void *
-g_next(struct seq_file *m, void *v, loff_t *pos)
+__g_next(struct seq_file *m, loff_t *pos)
 {
 	unsigned long *array = m->private;
-	int index = *pos;
 
-	(*pos)++;
-
-	if (index >= ftrace_graph_count)
+	if (*pos >= ftrace_graph_count)
 		return NULL;
+	return &array[*pos];
+}
 
-	return &array[index];
+static void *
+g_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return __g_next(m, pos);
 }
 
 static void *g_start(struct seq_file *m, loff_t *pos)
 {
-	void *p = NULL;
-
 	mutex_lock(&graph_lock);
 
 	/* Nothing, tell g_show to print all functions are enabled */
 	if (!ftrace_graph_count && !*pos)
 		return (void *)1;
 
-	p = g_next(m, p, pos);
-
-	return p;
+	return __g_next(m, pos);
 }
 
 static void g_stop(struct seq_file *m, void *p)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 04dac26..bf27bb7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1563,6 +1563,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	return NULL;
 }
 
+#ifdef CONFIG_TRACING
+
 #define TRACE_RECURSIVE_DEPTH 16
 
 static int trace_recursive_lock(void)
@@ -1593,6 +1595,13 @@ static void trace_recursive_unlock(void)
 	current->trace_recursion--;
 }
 
+#else
+
+#define trace_recursive_lock()		(0)
+#define trace_recursive_unlock()	do { } while (0)
+
+#endif
+
 static DEFINE_PER_CPU(int, rb_need_resched);
 
 /**
@@ -3104,6 +3113,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(ring_buffer_read_page);
 
+#ifdef CONFIG_TRACING
 static ssize_t
 rb_simple_read(struct file *filp, char __user *ubuf,
 	       size_t cnt, loff_t *ppos)
@@ -3171,6 +3181,7 @@ static __init int rb_init_debugfs(void)
 }
 
 fs_initcall(rb_init_debugfs);
+#endif
 
 #ifdef CONFIG_HOTPLUG_CPU
 static int rb_cpu_notify(struct notifier_block *self,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 076fa6f..3aa0a0d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -284,13 +284,12 @@ void trace_wake_up(void)
 static int __init set_buf_size(char *str)
 {
 	unsigned long buf_size;
-	int ret;
 
 	if (!str)
 		return 0;
-	ret = strict_strtoul(str, 0, &buf_size);
+	buf_size = memparse(str, &str);
 	/* nr_entries can not be zero */
-	if (ret < 0 || buf_size == 0)
+	if (buf_size == 0)
 		return 0;
 	trace_buf_size = buf_size;
 	return 1;
@@ -2053,25 +2052,23 @@ static int tracing_open(struct inode *inode, struct file *file)
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct tracer *t = m->private;
+	struct tracer *t = v;
 
 	(*pos)++;
 
 	if (t)
 		t = t->next;
 
-	m->private = t;
-
 	return t;
 }
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-	struct tracer *t = m->private;
+	struct tracer *t;
 	loff_t l = 0;
 
 	mutex_lock(&trace_types_lock);
-	for (; t && l < *pos; t = t_next(m, t, &l))
+	for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
 		;
 
 	return t;
@@ -2107,18 +2104,10 @@ static struct seq_operations show_traces_seq_ops = {
 
 static int show_traces_open(struct inode *inode, struct file *file)
 {
-	int ret;
-
 	if (tracing_disabled)
 		return -ENODEV;
 
-	ret = seq_open(file, &show_traces_seq_ops);
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-		m->private = trace_types;
-	}
-
-	return ret;
+	return seq_open(file, &show_traces_seq_ops);
 }
 
 static ssize_t
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6e735d4..3548ae5 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -597,6 +597,7 @@ print_graph_function(struct trace_iterator *iter)
 
 extern struct pid *ftrace_pid_trace;
 
+#ifdef CONFIG_FUNCTION_TRACER
 static inline int ftrace_trace_task(struct task_struct *task)
 {
 	if (!ftrace_pid_trace)
@@ -604,6 +605,12 @@ static inline int ftrace_trace_task(struct task_struct *task)
 
 	return test_tsk_trace_trace(task);
 }
+#else
+static inline int ftrace_trace_task(struct task_struct *task)
+{
+	return 1;
+}
+#endif
 
 /*
  * trace_iterator_flags is an enumeration that defines bit
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index aa08be6..53c8fd3 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -300,10 +300,18 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
+	struct ftrace_event_call *call = NULL;
+	loff_t l;
+
 	mutex_lock(&event_mutex);
-	if (*pos == 0)
-		m->private = ftrace_events.next;
-	return t_next(m, NULL, pos);
+
+	m->private = ftrace_events.next;
+	for (l = 0; l <= *pos; ) {
+		call = t_next(m, NULL, &l);
+		if (!call)
+			break;
+	}
+	return call;
 }
 
 static void *
@@ -332,10 +340,18 @@ s_next(struct seq_file *m, void *v, loff_t *pos)
 
 static void *s_start(struct seq_file *m, loff_t *pos)
 {
+	struct ftrace_event_call *call = NULL;
+	loff_t l;
+
 	mutex_lock(&event_mutex);
-	if (*pos == 0)
-		m->private = ftrace_events.next;
-	return s_next(m, NULL, pos);
+
+	m->private = ftrace_events.next;
+	for (l = 0; l <= *pos; ) {
+		call = s_next(m, NULL, &l);
+		if (!call)
+			break;
+	}
+	return call;
 }
 
 static int t_show(struct seq_file *m, void *v)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 90f1347..7402144 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -302,8 +302,7 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
 	if (count == -1)
 		seq_printf(m, ":unlimited\n");
 	else
-		seq_printf(m, ":count=%ld", count);
-	seq_putc(m, '\n');
+		seq_printf(m, ":count=%ld\n", count);
 
 	return 0;
 }
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 9bece96..7b62781 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -155,25 +155,19 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
 EXPORT_SYMBOL_GPL(__ftrace_vprintk);
 
 static void *
-t_next(struct seq_file *m, void *v, loff_t *pos)
+t_start(struct seq_file *m, loff_t *pos)
 {
-	const char **fmt = m->private;
-	const char **next = fmt;
-
-	(*pos)++;
+	const char **fmt = __start___trace_bprintk_fmt + *pos;
 
 	if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt)
 		return NULL;
-
-	next = fmt;
-	m->private = ++next;
-
 	return fmt;
 }
 
-static void *t_start(struct seq_file *m, loff_t *pos)
+static void *t_next(struct seq_file *m, void * v, loff_t *pos)
 {
-	return t_next(m, NULL, pos);
+	(*pos)++;
+	return t_start(m, pos);
 }
 
 static int t_show(struct seq_file *m, void *v)
@@ -224,15 +218,7 @@ static const struct seq_operations show_format_seq_ops = {
 static int
 ftrace_formats_open(struct inode *inode, struct file *file)
 {
-	int ret;
-
-	ret = seq_open(file, &show_format_seq_ops);
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-
-		m->private = __start___trace_bprintk_fmt;
-	}
-	return ret;
+	return seq_open(file, &show_format_seq_ops);
 }
 
 static const struct file_operations ftrace_formats_fops = {
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index c006437..e66f5e4 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -199,17 +199,13 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
 	mutex_lock(&session->stat_mutex);
 
 	/* If we are in the beginning of the file, print the headers */
-	if (!*pos && session->ts->stat_headers) {
-		(*pos)++;
+	if (!*pos && session->ts->stat_headers)
 		return SEQ_START_TOKEN;
-	}
 
 	node = rb_first(&session->stat_root);
 	for (i = 0; node && i < *pos; i++)
 		node = rb_next(node);
 
-	(*pos)++;
-
 	return node;
 }
 

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-06-20 16:53 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-06-20 16:53 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Steven Rostedt, Andrew Morton, Thomas Gleixner

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Frederic Weisbecker (2):
      tracing/urgent: fix unbalanced ftrace_start_up
      tracing/urgent: warn in case of ftrace_start_up inbalance

Li Zefan (6):
      tracing: fix a typo in tracing_cpumask_write()
      tracing: replace a GFP_ATOMIC with GFP_KERNEL allocation
      tracing/filters: operand can be negative
      tracing/filters: strloc should be unsigned short
      tracing/filters: free filter_string in destroy_preds()
      tracing/filters: fix race between filter setting and module unload

Rusty Russell (1):
      cpumask: use new operators in kernel/trace

Steven Rostedt (14):
      ring-buffer: prevent adding write in discarded area
      ring-buffer: have benchmark test handle discarded events
      ring-buffer: remove unused variable
      ring-buffer: use commit counters for commit pointer accounting
      tracing: update sample event documentation
      ring-buffer: use BUF_PAGE_HDR_SIZE in calculating index
      ring-buffer: remove useless warn on check
      ring-buffer: remove useless compile check for buffer_page size
      ring-buffer: check for less than two in size allocation
      ring-buffer: add locks around rb_per_cpu_empty
      ring-buffer: do not grab locks in nmi
      ring-buffer: have benchmark test print to trace buffer
      function-graph: disable when both x86_32 and optimize for size are configured
      function-graph: add stack frame test

Wu Zhangjin (1):
      tracing: fix undeclared 'PAGE_SIZE' in include/linux/trace_seq.h


 arch/powerpc/kernel/ftrace.c               |    2 +-
 arch/s390/kernel/ftrace.c                  |    2 +-
 arch/x86/Kconfig                           |    1 +
 arch/x86/kernel/entry_32.S                 |    2 +
 arch/x86/kernel/entry_64.S                 |    2 +
 arch/x86/kernel/ftrace.c                   |    6 +-
 include/linux/ftrace.h                     |    4 +-
 include/linux/trace_seq.h                  |    2 +
 kernel/trace/Kconfig                       |    8 +
 kernel/trace/ftrace.c                      |    7 +
 kernel/trace/kmemtrace.c                   |    2 +-
 kernel/trace/ring_buffer.c                 |  309 +++++++++++++++++-----------
 kernel/trace/ring_buffer_benchmark.c       |   45 +++--
 kernel/trace/trace.c                       |    8 +-
 kernel/trace/trace_events_filter.c         |   37 ++--
 kernel/trace/trace_functions.c             |    8 +-
 kernel/trace/trace_functions_graph.c       |   36 +++-
 samples/trace_events/Makefile              |    8 +
 samples/trace_events/trace-events-sample.h |   27 ++-
 19 files changed, 331 insertions(+), 185 deletions(-)

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 2d182f1..58d6a61 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -605,7 +605,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		return;
 	}
 
-	if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
+	if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) {
 		*parent = old;
 		return;
 	}
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 82ddfd3..3e298e6 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -190,7 +190,7 @@ unsigned long prepare_ftrace_return(unsigned long ip, unsigned long parent)
 		goto out;
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		goto out;
-	if (ftrace_push_return_trace(parent, ip, &trace.depth) == -EBUSY)
+	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
 		goto out;
 	trace.func = ftrace_mcount_call_adjust(ip) & PSW_ADDR_INSN;
 	/* Only trace if the calling function expects to. */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 356d2ec..fcf12af 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -33,6 +33,7 @@ config X86
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_GRAPH_FP_TEST
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
 	select HAVE_FTRACE_SYSCALLS
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c929add..0d4b285 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1154,6 +1154,7 @@ ENTRY(ftrace_graph_caller)
 	pushl %edx
 	movl 0xc(%esp), %edx
 	lea 0x4(%ebp), %eax
+	movl (%ebp), %ecx
 	subl $MCOUNT_INSN_SIZE, %edx
 	call prepare_ftrace_return
 	popl %edx
@@ -1168,6 +1169,7 @@ return_to_handler:
 	pushl %eax
 	pushl %ecx
 	pushl %edx
+	movl %ebp, %eax
 	call ftrace_return_to_handler
 	movl %eax, 0xc(%esp)
 	popl %edx
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index de74f0a..c251be7 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -135,6 +135,7 @@ ENTRY(ftrace_graph_caller)
 
 	leaq 8(%rbp), %rdi
 	movq 0x38(%rsp), %rsi
+	movq (%rbp), %rdx
 	subq $MCOUNT_INSN_SIZE, %rsi
 
 	call	prepare_ftrace_return
@@ -150,6 +151,7 @@ GLOBAL(return_to_handler)
 	/* Save the return values */
 	movq %rax, (%rsp)
 	movq %rdx, 8(%rsp)
+	movq %rbp, %rdi
 
 	call ftrace_return_to_handler
 
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index b79c553..d94e1ea 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -408,7 +408,8 @@ int ftrace_disable_ftrace_graph_caller(void)
  * Hook the return address and push it in the stack of return addrs
  * in current thread info.
  */
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+			   unsigned long frame_pointer)
 {
 	unsigned long old;
 	int faulted;
@@ -453,7 +454,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		return;
 	}
 
-	if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
+	if (ftrace_push_return_trace(old, self_addr, &trace.depth,
+		    frame_pointer) == -EBUSY) {
 		*parent = old;
 		return;
 	}
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 39b95c5..dc3b132 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -362,6 +362,7 @@ struct ftrace_ret_stack {
 	unsigned long func;
 	unsigned long long calltime;
 	unsigned long long subtime;
+	unsigned long fp;
 };
 
 /*
@@ -372,7 +373,8 @@ struct ftrace_ret_stack {
 extern void return_to_handler(void);
 
 extern int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth);
+ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
+			 unsigned long frame_pointer);
 
 /*
  * Sometimes we don't want to trace a function with the function
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index c68bccb..c134dd1 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -3,6 +3,8 @@
 
 #include <linux/fs.h>
 
+#include <asm/page.h>
+
 /*
  * Trace sequences are used to allow a function to call several other functions
  * to create a string of data to use (up to a max of PAGE_SIZE.
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4a13e5a..b17ed87 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -18,6 +18,13 @@ config HAVE_FUNCTION_TRACER
 config HAVE_FUNCTION_GRAPH_TRACER
 	bool
 
+config HAVE_FUNCTION_GRAPH_FP_TEST
+	bool
+	help
+	 An arch may pass in a unique value (frame pointer) to both the
+	 entering and exiting of a function. On exit, the value is compared
+	 and if it does not match, then it will panic the kernel.
+
 config HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	bool
 	help
@@ -121,6 +128,7 @@ config FUNCTION_GRAPH_TRACER
 	bool "Kernel Function Graph Tracer"
 	depends on HAVE_FUNCTION_GRAPH_TRACER
 	depends on FUNCTION_TRACER
+	depends on !X86_32 || !CC_OPTIMIZE_FOR_SIZE
 	default y
 	help
 	  Enable the kernel to trace a function at both its return
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bb60732..3718d55 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1224,6 +1224,13 @@ static void ftrace_shutdown(int command)
 		return;
 
 	ftrace_start_up--;
+	/*
+	 * Just warn in case of unbalance, no need to kill ftrace, it's not
+	 * critical but the ftrace_call callers may be never nopped again after
+	 * further ftrace uses.
+	 */
+	WARN_ON_ONCE(ftrace_start_up < 0);
+
 	if (!ftrace_start_up)
 		command |= FTRACE_DISABLE_CALLS;
 
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 86cdf67..1edaa95 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -186,7 +186,7 @@ static int kmem_trace_init(struct trace_array *tr)
 	int cpu;
 	kmemtrace_array = tr;
 
-	for_each_cpu_mask(cpu, cpu_possible_map)
+	for_each_cpu(cpu, cpu_possible_mask)
 		tracing_reset(tr, cpu);
 
 	kmemtrace_start_probes();
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2e642b2..589b3ee 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -205,6 +205,7 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
 #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
 #define RB_ALIGNMENT		4U
 #define RB_MAX_SMALL_DATA	(RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
+#define RB_EVNT_MIN_SIZE	8U	/* two 32bit words */
 
 /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
 #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -414,6 +415,8 @@ struct ring_buffer_per_cpu {
 	unsigned long			overrun;
 	unsigned long			read;
 	local_t				entries;
+	local_t				committing;
+	local_t				commits;
 	u64				write_stamp;
 	u64				read_stamp;
 	atomic_t			record_disabled;
@@ -617,12 +620,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 	kfree(cpu_buffer);
 }
 
-/*
- * Causes compile errors if the struct buffer_page gets bigger
- * than the struct page.
- */
-extern int ring_buffer_page_too_big(void);
-
 #ifdef CONFIG_HOTPLUG_CPU
 static int rb_cpu_notify(struct notifier_block *self,
 			 unsigned long action, void *hcpu);
@@ -645,11 +642,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
 	int bsize;
 	int cpu;
 
-	/* Paranoid! Optimizes out when all is well */
-	if (sizeof(struct buffer_page) > sizeof(struct page))
-		ring_buffer_page_too_big();
-
-
 	/* keep it in its own cache line */
 	buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
 			 GFP_KERNEL);
@@ -665,8 +657,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
 	buffer->reader_lock_key = key;
 
 	/* need at least two pages */
-	if (buffer->pages == 1)
-		buffer->pages++;
+	if (buffer->pages < 2)
+		buffer->pages = 2;
 
 	/*
 	 * In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1010,12 +1002,12 @@ rb_event_index(struct ring_buffer_event *event)
 {
 	unsigned long addr = (unsigned long)event;
 
-	return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
+	return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
 }
 
 static inline int
-rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
-	     struct ring_buffer_event *event)
+rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+		   struct ring_buffer_event *event)
 {
 	unsigned long addr = (unsigned long)event;
 	unsigned long index;
@@ -1028,31 +1020,6 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
 }
 
 static void
-rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
-		    struct ring_buffer_event *event)
-{
-	unsigned long addr = (unsigned long)event;
-	unsigned long index;
-
-	index = rb_event_index(event);
-	addr &= PAGE_MASK;
-
-	while (cpu_buffer->commit_page->page != (void *)addr) {
-		if (RB_WARN_ON(cpu_buffer,
-			  cpu_buffer->commit_page == cpu_buffer->tail_page))
-			return;
-		cpu_buffer->commit_page->page->commit =
-			cpu_buffer->commit_page->write;
-		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
-		cpu_buffer->write_stamp =
-			cpu_buffer->commit_page->page->time_stamp;
-	}
-
-	/* Now set the commit to the event's index */
-	local_set(&cpu_buffer->commit_page->page->commit, index);
-}
-
-static void
 rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	/*
@@ -1170,6 +1137,59 @@ static unsigned rb_calculate_event_length(unsigned length)
 	return length;
 }
 
+static inline void
+rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
+	      struct buffer_page *tail_page,
+	      unsigned long tail, unsigned long length)
+{
+	struct ring_buffer_event *event;
+
+	/*
+	 * Only the event that crossed the page boundary
+	 * must fill the old tail_page with padding.
+	 */
+	if (tail >= BUF_PAGE_SIZE) {
+		local_sub(length, &tail_page->write);
+		return;
+	}
+
+	event = __rb_page_index(tail_page, tail);
+
+	/*
+	 * If this event is bigger than the minimum size, then
+	 * we need to be careful that we don't subtract the
+	 * write counter enough to allow another writer to slip
+	 * in on this page.
+	 * We put in a discarded commit instead, to make sure
+	 * that this space is not used again.
+	 *
+	 * If we are less than the minimum size, we don't need to
+	 * worry about it.
+	 */
+	if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
+		/* No room for any events */
+
+		/* Mark the rest of the page with padding */
+		rb_event_set_padding(event);
+
+		/* Set the write back to the previous setting */
+		local_sub(length, &tail_page->write);
+		return;
+	}
+
+	/* Put in a discarded event */
+	event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
+	event->type_len = RINGBUF_TYPE_PADDING;
+	/* time delta must be non zero */
+	event->time_delta = 1;
+	/* Account for this as an entry */
+	local_inc(&tail_page->entries);
+	local_inc(&cpu_buffer->entries);
+
+	/* Set write to end of buffer */
+	length = (tail + length) - BUF_PAGE_SIZE;
+	local_sub(length, &tail_page->write);
+}
 
 static struct ring_buffer_event *
 rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
@@ -1179,7 +1199,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 {
 	struct buffer_page *next_page, *head_page, *reader_page;
 	struct ring_buffer *buffer = cpu_buffer->buffer;
-	struct ring_buffer_event *event;
 	bool lock_taken = false;
 	unsigned long flags;
 
@@ -1264,26 +1283,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 		cpu_buffer->tail_page->page->time_stamp = *ts;
 	}
 
-	/*
-	 * The actual tail page has moved forward.
-	 */
-	if (tail < BUF_PAGE_SIZE) {
-		/* Mark the rest of the page with padding */
-		event = __rb_page_index(tail_page, tail);
-		rb_event_set_padding(event);
-	}
-
-	/* Set the write back to the previous setting */
-	local_sub(length, &tail_page->write);
-
-	/*
-	 * If this was a commit entry that failed,
-	 * increment that too
-	 */
-	if (tail_page == cpu_buffer->commit_page &&
-	    tail == rb_commit_index(cpu_buffer)) {
-		rb_set_commit_to_write(cpu_buffer);
-	}
+	rb_reset_tail(cpu_buffer, tail_page, tail, length);
 
 	__raw_spin_unlock(&cpu_buffer->lock);
 	local_irq_restore(flags);
@@ -1293,7 +1293,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 
  out_reset:
 	/* reset write */
-	local_sub(length, &tail_page->write);
+	rb_reset_tail(cpu_buffer, tail_page, tail, length);
 
 	if (likely(lock_taken))
 		__raw_spin_unlock(&cpu_buffer->lock);
@@ -1323,9 +1323,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 
 	/* We reserved something on the buffer */
 
-	if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
-		return NULL;
-
 	event = __rb_page_index(tail_page, tail);
 	rb_update_event(event, type, length);
 
@@ -1334,11 +1331,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		local_inc(&tail_page->entries);
 
 	/*
-	 * If this is a commit and the tail is zero, then update
-	 * this page's time stamp.
+	 * If this is the first commit on the page, then update
+	 * its timestamp.
 	 */
-	if (!tail && rb_is_commit(cpu_buffer, event))
-		cpu_buffer->commit_page->page->time_stamp = *ts;
+	if (!tail)
+		tail_page->page->time_stamp = *ts;
 
 	return event;
 }
@@ -1407,16 +1404,16 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 		return -EAGAIN;
 
 	/* Only a commited time event can update the write stamp */
-	if (rb_is_commit(cpu_buffer, event)) {
+	if (rb_event_is_commit(cpu_buffer, event)) {
 		/*
-		 * If this is the first on the page, then we need to
-		 * update the page itself, and just put in a zero.
+		 * If this is the first on the page, then it was
+		 * updated with the page itself. Try to discard it
+		 * and if we can't just make it zero.
 		 */
 		if (rb_event_index(event)) {
 			event->time_delta = *delta & TS_MASK;
 			event->array[0] = *delta >> TS_SHIFT;
 		} else {
-			cpu_buffer->commit_page->page->time_stamp = *ts;
 			/* try to discard, since we do not need this */
 			if (!rb_try_to_discard(cpu_buffer, event)) {
 				/* nope, just zero it */
@@ -1442,6 +1439,44 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 	return ret;
 }
 
+static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	local_inc(&cpu_buffer->committing);
+	local_inc(&cpu_buffer->commits);
+}
+
+static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	unsigned long commits;
+
+	if (RB_WARN_ON(cpu_buffer,
+		       !local_read(&cpu_buffer->committing)))
+		return;
+
+ again:
+	commits = local_read(&cpu_buffer->commits);
+	/* synchronize with interrupts */
+	barrier();
+	if (local_read(&cpu_buffer->committing) == 1)
+		rb_set_commit_to_write(cpu_buffer);
+
+	local_dec(&cpu_buffer->committing);
+
+	/* synchronize with interrupts */
+	barrier();
+
+	/*
+	 * Need to account for interrupts coming in between the
+	 * updating of the commit page and the clearing of the
+	 * committing counter.
+	 */
+	if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
+	    !local_read(&cpu_buffer->committing)) {
+		local_inc(&cpu_buffer->committing);
+		goto again;
+	}
+}
+
 static struct ring_buffer_event *
 rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 		      unsigned long length)
@@ -1451,6 +1486,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	int commit = 0;
 	int nr_loops = 0;
 
+	rb_start_commit(cpu_buffer);
+
 	length = rb_calculate_event_length(length);
  again:
 	/*
@@ -1463,7 +1500,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	 * Bail!
 	 */
 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
-		return NULL;
+		goto out_fail;
 
 	ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
 
@@ -1494,7 +1531,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 
 			commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
 			if (commit == -EBUSY)
-				return NULL;
+				goto out_fail;
 
 			if (commit == -EAGAIN)
 				goto again;
@@ -1508,28 +1545,19 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	if (unlikely(PTR_ERR(event) == -EAGAIN))
 		goto again;
 
-	if (!event) {
-		if (unlikely(commit))
-			/*
-			 * Ouch! We needed a timestamp and it was commited. But
-			 * we didn't get our event reserved.
-			 */
-			rb_set_commit_to_write(cpu_buffer);
-		return NULL;
-	}
+	if (!event)
+		goto out_fail;
 
-	/*
-	 * If the timestamp was commited, make the commit our entry
-	 * now so that we will update it when needed.
-	 */
-	if (unlikely(commit))
-		rb_set_commit_event(cpu_buffer, event);
-	else if (!rb_is_commit(cpu_buffer, event))
+	if (!rb_event_is_commit(cpu_buffer, event))
 		delta = 0;
 
 	event->time_delta = delta;
 
 	return event;
+
+ out_fail:
+	rb_end_commit(cpu_buffer);
+	return NULL;
 }
 
 #define TRACE_RECURSIVE_DEPTH 16
@@ -1639,13 +1667,14 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
 {
 	local_inc(&cpu_buffer->entries);
 
-	/* Only process further if we own the commit */
-	if (!rb_is_commit(cpu_buffer, event))
-		return;
-
-	cpu_buffer->write_stamp += event->time_delta;
+	/*
+	 * The event first in the commit queue updates the
+	 * time stamp.
+	 */
+	if (rb_event_is_commit(cpu_buffer, event))
+		cpu_buffer->write_stamp += event->time_delta;
 
-	rb_set_commit_to_write(cpu_buffer);
+	rb_end_commit(cpu_buffer);
 }
 
 /**
@@ -1734,15 +1763,15 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 	/* The event is discarded regardless */
 	rb_event_discard(event);
 
+	cpu = smp_processor_id();
+	cpu_buffer = buffer->buffers[cpu];
+
 	/*
 	 * This must only be called if the event has not been
 	 * committed yet. Thus we can assume that preemption
 	 * is still disabled.
 	 */
-	RB_WARN_ON(buffer, preemptible());
-
-	cpu = smp_processor_id();
-	cpu_buffer = buffer->buffers[cpu];
+	RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
 
 	if (!rb_try_to_discard(cpu_buffer, event))
 		goto out;
@@ -1753,13 +1782,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 	 */
 	local_inc(&cpu_buffer->entries);
  out:
-	/*
-	 * If a write came in and pushed the tail page
-	 * we still need to update the commit pointer
-	 * if we were the commit.
-	 */
-	if (rb_is_commit(cpu_buffer, event))
-		rb_set_commit_to_write(cpu_buffer);
+	rb_end_commit(cpu_buffer);
 
 	trace_recursive_unlock();
 
@@ -2443,6 +2466,21 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
 
+static inline int rb_ok_to_lock(void)
+{
+	/*
+	 * If an NMI die dumps out the content of the ring buffer
+	 * do not grab locks. We also permanently disable the ring
+	 * buffer too. A one time deal is all you get from reading
+	 * the ring buffer from an NMI.
+	 */
+	if (likely(!in_nmi() && !oops_in_progress))
+		return 1;
+
+	tracing_off_permanent();
+	return 0;
+}
+
 /**
  * ring_buffer_peek - peek at the next event to be read
  * @buffer: The ring buffer to read
@@ -2458,14 +2496,20 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
 	struct ring_buffer_event *event;
 	unsigned long flags;
+	int dolock;
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
 
+	dolock = rb_ok_to_lock();
  again:
-	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+	local_irq_save(flags);
+	if (dolock)
+		spin_lock(&cpu_buffer->reader_lock);
 	event = rb_buffer_peek(buffer, cpu, ts);
-	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+	if (dolock)
+		spin_unlock(&cpu_buffer->reader_lock);
+	local_irq_restore(flags);
 
 	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
@@ -2517,6 +2561,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event = NULL;
 	unsigned long flags;
+	int dolock;
+
+	dolock = rb_ok_to_lock();
 
  again:
 	/* might be called in atomic */
@@ -2526,7 +2573,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 		goto out;
 
 	cpu_buffer = buffer->buffers[cpu];
-	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+	local_irq_save(flags);
+	if (dolock)
+		spin_lock(&cpu_buffer->reader_lock);
 
 	event = rb_buffer_peek(buffer, cpu, ts);
 	if (!event)
@@ -2535,7 +2584,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 	rb_advance_reader(cpu_buffer);
 
  out_unlock:
-	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+	if (dolock)
+		spin_unlock(&cpu_buffer->reader_lock);
+	local_irq_restore(flags);
 
  out:
 	preempt_enable();
@@ -2677,6 +2728,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	cpu_buffer->overrun = 0;
 	cpu_buffer->read = 0;
 	local_set(&cpu_buffer->entries, 0);
+	local_set(&cpu_buffer->committing, 0);
+	local_set(&cpu_buffer->commits, 0);
 
 	cpu_buffer->write_stamp = 0;
 	cpu_buffer->read_stamp = 0;
@@ -2731,12 +2784,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset);
 int ring_buffer_empty(struct ring_buffer *buffer)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long flags;
+	int dolock;
 	int cpu;
+	int ret;
+
+	dolock = rb_ok_to_lock();
 
 	/* yes this is racy, but if you don't like the race, lock the buffer */
 	for_each_buffer_cpu(buffer, cpu) {
 		cpu_buffer = buffer->buffers[cpu];
-		if (!rb_per_cpu_empty(cpu_buffer))
+		local_irq_save(flags);
+		if (dolock)
+			spin_lock(&cpu_buffer->reader_lock);
+		ret = rb_per_cpu_empty(cpu_buffer);
+		if (dolock)
+			spin_unlock(&cpu_buffer->reader_lock);
+		local_irq_restore(flags);
+
+		if (!ret)
 			return 0;
 	}
 
@@ -2752,14 +2818,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty);
 int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long flags;
+	int dolock;
 	int ret;
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return 1;
 
+	dolock = rb_ok_to_lock();
+
 	cpu_buffer = buffer->buffers[cpu];
+	local_irq_save(flags);
+	if (dolock)
+		spin_lock(&cpu_buffer->reader_lock);
 	ret = rb_per_cpu_empty(cpu_buffer);
-
+	if (dolock)
+		spin_unlock(&cpu_buffer->reader_lock);
+	local_irq_restore(flags);
 
 	return ret;
 }
@@ -3105,7 +3180,7 @@ static int rb_cpu_notify(struct notifier_block *self,
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		if (cpu_isset(cpu, *buffer->cpumask))
+		if (cpumask_test_cpu(cpu, buffer->cpumask))
 			return NOTIFY_OK;
 
 		buffer->buffers[cpu] =
@@ -3116,7 +3191,7 @@ static int rb_cpu_notify(struct notifier_block *self,
 			return NOTIFY_OK;
 		}
 		smp_wmb();
-		cpu_set(cpu, *buffer->cpumask);
+		cpumask_set_cpu(cpu, buffer->cpumask);
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 8d68e14..573d3cc 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -102,8 +102,10 @@ static enum event_status read_page(int cpu)
 			event = (void *)&rpage->data[i];
 			switch (event->type_len) {
 			case RINGBUF_TYPE_PADDING:
-				/* We don't expect any padding */
-				KILL_TEST();
+				/* failed writes may be discarded events */
+				if (!event->time_delta)
+					KILL_TEST();
+				inc = event->array[0] + 4;
 				break;
 			case RINGBUF_TYPE_TIME_EXTEND:
 				inc = 8;
@@ -119,7 +121,7 @@ static enum event_status read_page(int cpu)
 					KILL_TEST();
 					break;
 				}
-				inc = event->array[0];
+				inc = event->array[0] + 4;
 				break;
 			default:
 				entry = ring_buffer_event_data(event);
@@ -201,7 +203,7 @@ static void ring_buffer_producer(void)
 	 * Hammer the buffer for 10 secs (this may
 	 * make the system stall)
 	 */
-	pr_info("Starting ring buffer hammer\n");
+	trace_printk("Starting ring buffer hammer\n");
 	do_gettimeofday(&start_tv);
 	do {
 		struct ring_buffer_event *event;
@@ -237,7 +239,7 @@ static void ring_buffer_producer(void)
 #endif
 
 	} while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
-	pr_info("End ring buffer hammer\n");
+	trace_printk("End ring buffer hammer\n");
 
 	if (consumer) {
 		/* Init both completions here to avoid races */
@@ -260,49 +262,50 @@ static void ring_buffer_producer(void)
 	overruns = ring_buffer_overruns(buffer);
 
 	if (kill_test)
-		pr_info("ERROR!\n");
-	pr_info("Time:     %lld (usecs)\n", time);
-	pr_info("Overruns: %lld\n", overruns);
+		trace_printk("ERROR!\n");
+	trace_printk("Time:     %lld (usecs)\n", time);
+	trace_printk("Overruns: %lld\n", overruns);
 	if (disable_reader)
-		pr_info("Read:     (reader disabled)\n");
+		trace_printk("Read:     (reader disabled)\n");
 	else
-		pr_info("Read:     %ld  (by %s)\n", read,
+		trace_printk("Read:     %ld  (by %s)\n", read,
 			read_events ? "events" : "pages");
-	pr_info("Entries:  %lld\n", entries);
-	pr_info("Total:    %lld\n", entries + overruns + read);
-	pr_info("Missed:   %ld\n", missed);
-	pr_info("Hit:      %ld\n", hit);
+	trace_printk("Entries:  %lld\n", entries);
+	trace_printk("Total:    %lld\n", entries + overruns + read);
+	trace_printk("Missed:   %ld\n", missed);
+	trace_printk("Hit:      %ld\n", hit);
 
 	/* Convert time from usecs to millisecs */
 	do_div(time, USEC_PER_MSEC);
 	if (time)
 		hit /= (long)time;
 	else
-		pr_info("TIME IS ZERO??\n");
+		trace_printk("TIME IS ZERO??\n");
 
-	pr_info("Entries per millisec: %ld\n", hit);
+	trace_printk("Entries per millisec: %ld\n", hit);
 
 	if (hit) {
 		/* Calculate the average time in nanosecs */
 		avg = NSEC_PER_MSEC / hit;
-		pr_info("%ld ns per entry\n", avg);
+		trace_printk("%ld ns per entry\n", avg);
 	}
 
 	if (missed) {
 		if (time)
 			missed /= (long)time;
 
-		pr_info("Total iterations per millisec: %ld\n", hit + missed);
+		trace_printk("Total iterations per millisec: %ld\n",
+			     hit + missed);
 
 		/* it is possible that hit + missed will overflow and be zero */
 		if (!(hit + missed)) {
-			pr_info("hit + missed overflowed and totalled zero!\n");
+			trace_printk("hit + missed overflowed and totalled zero!\n");
 			hit--; /* make it non zero */
 		}
 
 		/* Caculate the average time in nanosecs */
 		avg = NSEC_PER_MSEC / (hit + missed);
-		pr_info("%ld ns per entry\n", avg);
+		trace_printk("%ld ns per entry\n", avg);
 	}
 }
 
@@ -353,7 +356,7 @@ static int ring_buffer_producer_thread(void *arg)
 
 		ring_buffer_producer();
 
-		pr_info("Sleeping for 10 secs\n");
+		trace_printk("Sleeping for 10 secs\n");
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(HZ * SLEEP_TIME);
 		__set_current_state(TASK_RUNNING);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8acd9b8..0f57f1b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2191,11 +2191,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
 		return -ENOMEM;
 
-	mutex_lock(&tracing_cpumask_update_lock);
 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
 	if (err)
 		goto err_unlock;
 
+	mutex_lock(&tracing_cpumask_update_lock);
+
 	local_irq_disable();
 	__raw_spin_lock(&ftrace_max_lock);
 	for_each_tracing_cpu(cpu) {
@@ -2223,8 +2224,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 	return count;
 
 err_unlock:
-	mutex_unlock(&tracing_cpumask_update_lock);
-	free_cpumask_var(tracing_cpumask);
+	free_cpumask_var(tracing_cpumask_new);
 
 	return err;
 }
@@ -3627,7 +3627,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 	struct trace_seq *s;
 	unsigned long cnt;
 
-	s = kmalloc(sizeof(*s), GFP_ATOMIC);
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (!s)
 		return ENOMEM;
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index db6e54b..936c621 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -27,8 +27,6 @@
 #include "trace.h"
 #include "trace_output.h"
 
-static DEFINE_MUTEX(filter_mutex);
-
 enum filter_op_ids
 {
 	OP_OR,
@@ -178,7 +176,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
 static int filter_pred_strloc(struct filter_pred *pred, void *event,
 			      int val1, int val2)
 {
-	int str_loc = *(int *)(event + pred->offset);
+	unsigned short str_loc = *(unsigned short *)(event + pred->offset);
 	char *addr = (char *)(event + str_loc);
 	int cmp, match;
 
@@ -294,12 +292,12 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
 {
 	struct event_filter *filter = call->filter;
 
-	mutex_lock(&filter_mutex);
+	mutex_lock(&event_mutex);
 	if (filter->filter_string)
 		trace_seq_printf(s, "%s\n", filter->filter_string);
 	else
 		trace_seq_printf(s, "none\n");
-	mutex_unlock(&filter_mutex);
+	mutex_unlock(&event_mutex);
 }
 
 void print_subsystem_event_filter(struct event_subsystem *system,
@@ -307,12 +305,12 @@ void print_subsystem_event_filter(struct event_subsystem *system,
 {
 	struct event_filter *filter = system->filter;
 
-	mutex_lock(&filter_mutex);
+	mutex_lock(&event_mutex);
 	if (filter->filter_string)
 		trace_seq_printf(s, "%s\n", filter->filter_string);
 	else
 		trace_seq_printf(s, "none\n");
-	mutex_unlock(&filter_mutex);
+	mutex_unlock(&event_mutex);
 }
 
 static struct ftrace_event_field *
@@ -381,6 +379,7 @@ void destroy_preds(struct ftrace_event_call *call)
 			filter_free_pred(filter->preds[i]);
 	}
 	kfree(filter->preds);
+	kfree(filter->filter_string);
 	kfree(filter);
 	call->filter = NULL;
 }
@@ -433,7 +432,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
 		filter->n_preds = 0;
 	}
 
-	mutex_lock(&event_mutex);
 	list_for_each_entry(call, &ftrace_events, list) {
 		if (!call->define_fields)
 			continue;
@@ -443,7 +441,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
 			remove_filter_string(call->filter);
 		}
 	}
-	mutex_unlock(&event_mutex);
 }
 
 static int filter_add_pred_fn(struct filter_parse_state *ps,
@@ -546,6 +543,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
 	filter_pred_fn_t fn;
 	unsigned long long val;
 	int string_type;
+	int ret;
 
 	pred->fn = filter_pred_none;
 
@@ -581,7 +579,11 @@ static int filter_add_pred(struct filter_parse_state *ps,
 			pred->not = 1;
 		return filter_add_pred_fn(ps, call, pred, fn);
 	} else {
-		if (strict_strtoull(pred->str_val, 0, &val)) {
+		if (field->is_signed)
+			ret = strict_strtoll(pred->str_val, 0, &val);
+		else
+			ret = strict_strtoull(pred->str_val, 0, &val);
+		if (ret) {
 			parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
 			return -EINVAL;
 		}
@@ -625,7 +627,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
 	filter->preds[filter->n_preds] = pred;
 	filter->n_preds++;
 
-	mutex_lock(&event_mutex);
 	list_for_each_entry(call, &ftrace_events, list) {
 
 		if (!call->define_fields)
@@ -636,14 +637,12 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
 
 		err = filter_add_pred(ps, call, pred);
 		if (err) {
-			mutex_unlock(&event_mutex);
 			filter_free_subsystem_preds(system);
 			parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
 			goto out;
 		}
 		replace_filter_string(call->filter, filter_string);
 	}
-	mutex_unlock(&event_mutex);
 out:
 	return err;
 }
@@ -1070,12 +1069,12 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 
 	struct filter_parse_state *ps;
 
-	mutex_lock(&filter_mutex);
+	mutex_lock(&event_mutex);
 
 	if (!strcmp(strstrip(filter_string), "0")) {
 		filter_disable_preds(call);
 		remove_filter_string(call->filter);
-		mutex_unlock(&filter_mutex);
+		mutex_unlock(&event_mutex);
 		return 0;
 	}
 
@@ -1103,7 +1102,7 @@ out:
 	postfix_clear(ps);
 	kfree(ps);
 out_unlock:
-	mutex_unlock(&filter_mutex);
+	mutex_unlock(&event_mutex);
 
 	return err;
 }
@@ -1115,12 +1114,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 
 	struct filter_parse_state *ps;
 
-	mutex_lock(&filter_mutex);
+	mutex_lock(&event_mutex);
 
 	if (!strcmp(strstrip(filter_string), "0")) {
 		filter_free_subsystem_preds(system);
 		remove_filter_string(system->filter);
-		mutex_unlock(&filter_mutex);
+		mutex_unlock(&event_mutex);
 		return 0;
 	}
 
@@ -1148,7 +1147,7 @@ out:
 	postfix_clear(ps);
 	kfree(ps);
 out_unlock:
-	mutex_unlock(&filter_mutex);
+	mutex_unlock(&event_mutex);
 
 	return err;
 }
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index c9a0b7d..90f1347 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -193,9 +193,11 @@ static void tracing_start_function_trace(void)
 static void tracing_stop_function_trace(void)
 {
 	ftrace_function_enabled = 0;
-	/* OK if they are not registered */
-	unregister_ftrace_function(&trace_stack_ops);
-	unregister_ftrace_function(&trace_ops);
+
+	if (func_flags.val & TRACE_FUNC_OPT_STACK)
+		unregister_ftrace_function(&trace_stack_ops);
+	else
+		unregister_ftrace_function(&trace_ops);
 }
 
 static int func_set_flag(u32 old_flags, u32 bit, int set)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8b59241..d2249ab 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -57,7 +57,8 @@ static struct tracer_flags tracer_flags = {
 
 /* Add a function return address to the trace stack on thread info.*/
 int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
+ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
+			 unsigned long frame_pointer)
 {
 	unsigned long long calltime;
 	int index;
@@ -85,6 +86,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
 	current->ret_stack[index].func = func;
 	current->ret_stack[index].calltime = calltime;
 	current->ret_stack[index].subtime = 0;
+	current->ret_stack[index].fp = frame_pointer;
 	*depth = index;
 
 	return 0;
@@ -92,7 +94,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
 
 /* Retrieve a function return address to the trace stack on thread info.*/
 static void
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
+ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
+			unsigned long frame_pointer)
 {
 	int index;
 
@@ -106,6 +109,31 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 		return;
 	}
 
+#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
+	/*
+	 * The arch may choose to record the frame pointer used
+	 * and check it here to make sure that it is what we expect it
+	 * to be. If gcc does not set the place holder of the return
+	 * address in the frame pointer, and does a copy instead, then
+	 * the function graph trace will fail. This test detects this
+	 * case.
+	 *
+	 * Currently, x86_32 with optimize for size (-Os) makes the latest
+	 * gcc do the above.
+	 */
+	if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
+		ftrace_graph_stop();
+		WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
+		     "  from func %pF return to %lx\n",
+		     current->ret_stack[index].fp,
+		     frame_pointer,
+		     (void *)current->ret_stack[index].func,
+		     current->ret_stack[index].ret);
+		*ret = (unsigned long)panic;
+		return;
+	}
+#endif
+
 	*ret = current->ret_stack[index].ret;
 	trace->func = current->ret_stack[index].func;
 	trace->calltime = current->ret_stack[index].calltime;
@@ -117,12 +145,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
  * Send the trace to the ring-buffer.
  * @return the original return address.
  */
-unsigned long ftrace_return_to_handler(void)
+unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 {
 	struct ftrace_graph_ret trace;
 	unsigned long ret;
 
-	ftrace_pop_return_trace(&trace, &ret);
+	ftrace_pop_return_trace(&trace, &ret, frame_pointer);
 	trace.rettime = trace_clock_local();
 	ftrace_graph_return(&trace);
 	barrier();
diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile
index 0d428dc..0f8d921 100644
--- a/samples/trace_events/Makefile
+++ b/samples/trace_events/Makefile
@@ -1,6 +1,14 @@
 # builds the trace events example kernel modules;
 # then to use one (as root):  insmod <module_name.ko>
 
+# If you include a trace header outside of include/trace/events
+# then the file that does the #define CREATE_TRACE_POINTS must
+# have that tracer file in its main search path. This is because
+# define_trace.h will include it, and must be able to find it from
+# the include/trace directory.
+#
+# Here trace-events-sample.c does the CREATE_TRACE_POINTS.
+#
 CFLAGS_trace-events-sample.o := -I$(src)
 
 obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index 128a897..9977a75 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -19,16 +19,21 @@
  * If TRACE_SYSTEM is defined, that will be the directory created
  * in the ftrace directory under /debugfs/tracing/events/<system>
  *
- * The define_trace.h belowe will also look for a file name of
+ * The define_trace.h below will also look for a file name of
  * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
+ * In this case, it would look for sample.h
  *
- * If you want a different system than file name, you can override
- * the header name by defining TRACE_INCLUDE_FILE
+ * If the header name will be different than the system name
+ * (as in this case), then you can override the header name that
+ * define_trace.h will look up by defining TRACE_INCLUDE_FILE
  *
- * If this file was called, goofy.h, then we would define:
+ * This file is called trace-events-sample.h but we want the system
+ * to be called "sample". Therefore we must define the name of this
+ * file:
  *
- * #define TRACE_INCLUDE_FILE goofy
+ * #define TRACE_INCLUDE_FILE trace-events-sample
  *
+ * As we do an the bottom of this file.
  */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM sample
@@ -99,13 +104,13 @@ TRACE_EVENT(foo_bar,
  *
  * #define TRACE_INCLUDE_PATH ../../samples/trace_events
  *
- * But I chose to simply make it use the current directory and then in
- * the Makefile I added:
+ * But the safest and easiest way to simply make it use the directory
+ * that the file is in is to add in the Makefile:
  *
- * CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/
+ * CFLAGS_trace-events-sample.o := -I$(src)
  *
  * This will make sure the current path is part of the include
- * structure for our file so that we can find it.
+ * structure for our file so that define_trace.h can find it.
  *
  * I could have made only the top level directory the include:
  *
@@ -115,8 +120,8 @@ TRACE_EVENT(foo_bar,
  *
  * #define TRACE_INCLUDE_PATH samples/trace_events
  *
- * But then if something defines "samples" or "trace_events" then we
- * could risk that being converted too, and give us an unexpected
+ * But then if something defines "samples" or "trace_events" as a macro
+ * then we could risk that being converted too, and give us an unexpected
  * result.
  */
 #undef TRACE_INCLUDE_PATH

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-05-18 14:29 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-05-18 14:29 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
GeunSik Lim (1):
      tracing: Append prompt in /debug/tracing/README file

Steven Rostedt (1):
      x86/function-graph: fix constraint for recording old return value


 arch/x86/kernel/ftrace.c |    2 +-
 kernel/trace/trace.c     |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 18dfa30..b79c553 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -442,7 +442,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		_ASM_EXTABLE(1b, 4b)
 		_ASM_EXTABLE(2b, 4b)
 
-		: [old] "=r" (old), [faulted] "=r" (faulted)
+		: [old] "=&r" (old), [faulted] "=r" (faulted)
 		: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
 		: "memory"
 	);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a884c09..cda81ec 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2380,7 +2380,7 @@ static const char readme_msg[] =
 	"# echo print-parent > /debug/tracing/trace_options\n"
 	"# echo 1 > /debug/tracing/tracing_enabled\n"
 	"# cat /debug/tracing/trace > /tmp/trace.txt\n"
-	"echo 0 > /debug/tracing/tracing_enabled\n"
+	"# echo 0 > /debug/tracing/tracing_enabled\n"
 ;
 
 static ssize_t

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-05-05  9:31 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-05-05  9:31 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Steven Rostedt, Andrew Morton

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Steven Rostedt (1):
      tracing: fix ref count in splice pages

Stuart Bennett (1):
      tracing: x86, mmiotrace: fix range test


 arch/x86/mm/kmmio.c  |    2 +-
 kernel/trace/trace.c |    1 +
 2 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 4f115e0..50dc802 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -87,7 +87,7 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
 {
 	struct kmmio_probe *p;
 	list_for_each_entry_rcu(p, &kmmio_probes, list) {
-		if (addr >= p->addr && addr <= (p->addr + p->len))
+		if (addr >= p->addr && addr < (p->addr + p->len))
 			return p;
 	}
 	return NULL;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1ce5dc6..a884c09 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3448,6 +3448,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		if (!ref)
 			break;
 
+		ref->ref = 1;
 		ref->buffer = info->tr->buffer;
 		ref->page = ring_buffer_alloc_read_page(ref->buffer);
 		if (!ref->page) {

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-04-17  1:01 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-04-17  1:01 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Zhaolei (2):
      tracing: Fix power tracer header
      tracing: Fix branch tracer header


 kernel/trace/trace_branch.c |    8 ++++++++
 kernel/trace/trace_power.c  |    7 +++++++
 2 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index ad8c22e..8333715 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -155,6 +155,13 @@ static enum print_line_t trace_branch_print(struct trace_iterator *iter,
 	return TRACE_TYPE_HANDLED;
 }
 
+static void branch_print_header(struct seq_file *s)
+{
+	seq_puts(s, "#           TASK-PID    CPU#    TIMESTAMP  CORRECT"
+		"  FUNC:FILE:LINE\n");
+	seq_puts(s, "#              | |       |          |         |   "
+		"    |\n");
+}
 
 static struct trace_event trace_branch_event = {
 	.type		= TRACE_BRANCH,
@@ -169,6 +176,7 @@ static struct tracer branch_trace __read_mostly =
 #ifdef CONFIG_FTRACE_SELFTEST
 	.selftest	= trace_selftest_startup_branch,
 #endif /* CONFIG_FTRACE_SELFTEST */
+	.print_header	= branch_print_header,
 };
 
 __init static int init_branch_tracer(void)
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index bae791e..1184397 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -186,6 +186,12 @@ static enum print_line_t power_print_line(struct trace_iterator *iter)
 	return TRACE_TYPE_UNHANDLED;
 }
 
+static void power_print_header(struct seq_file *s)
+{
+	seq_puts(s, "#   TIMESTAMP      STATE  EVENT\n");
+	seq_puts(s, "#       |            |      |\n");
+}
+
 static struct tracer power_tracer __read_mostly =
 {
 	.name		= "power",
@@ -194,6 +200,7 @@ static struct tracer power_tracer __read_mostly =
 	.stop		= stop_power_trace,
 	.reset		= power_trace_reset,
 	.print_line	= power_print_line,
+	.print_header	= power_print_header,
 };
 
 static int init_power_trace(void)

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-04-13 17:32 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-04-13 17:32 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

[ Note the extension to include/linux/stringify.h - it's safe but 
  touches a generic header. ]

 Thanks,

	Ingo

------------------>
Lai Jiangshan (4):
      tracing: disable seeking for trace_pipe_raw
      tracing: allocate page when needed
      tracing: update file->f_pos when splice(2) it
      tracing: fix splice return too large

Li Zefan (5):
      tracing: fix document references
      tracing/filters: NIL-terminate user input filter
      tracing/filters: fix NULL pointer dereference
      tracing/filters: allow user input integer to be oct or hex
      tracing/filters: return proper error code when writing filter file

Zhaolei (2):
      Make __stringify support variable argument macros too
      ftrace: Output REC->var instead of __entry->var for trace format


 include/linux/stringify.h           |    4 +-
 kernel/trace/Kconfig                |    4 +-
 kernel/trace/trace.c                |   36 ++++++++++++++++++++--------------
 kernel/trace/trace_events.c         |   12 +++++++---
 kernel/trace/trace_events_filter.c  |   14 +++++++++---
 kernel/trace/trace_events_stage_2.h |    4 +-
 6 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/include/linux/stringify.h b/include/linux/stringify.h
index 0b43883..841cec8 100644
--- a/include/linux/stringify.h
+++ b/include/linux/stringify.h
@@ -6,7 +6,7 @@
  * converts to "bar".
  */
 
-#define __stringify_1(x)	#x
-#define __stringify(x)		__stringify_1(x)
+#define __stringify_1(x...)	#x
+#define __stringify(x...)	__stringify_1(x)
 
 #endif	/* !__LINUX_STRINGIFY_H */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2246141..417d198 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -312,7 +312,7 @@ config KMEMTRACE
 	  and profile kernel code.
 
 	  This requires an userspace application to use. See
-	  Documentation/vm/kmemtrace.txt for more information.
+	  Documentation/trace/kmemtrace.txt for more information.
 
 	  Saying Y will make the kernel somewhat larger and slower. However,
 	  if you disable kmemtrace at run-time or boot-time, the performance
@@ -403,7 +403,7 @@ config MMIOTRACE
 	  implementation and works via page faults. Tracing is disabled by
 	  default and can be enabled at run-time.
 
-	  See Documentation/tracers/mmiotrace.txt.
+	  See Documentation/trace/mmiotrace.txt.
 	  If you are not helping to develop drivers, say N.
 
 config MMIOTRACE_TEST
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9d28476..1ce5dc6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3277,19 +3277,13 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
 
 	info->tr	= &global_trace;
 	info->cpu	= cpu;
-	info->spare	= ring_buffer_alloc_read_page(info->tr->buffer);
+	info->spare	= NULL;
 	/* Force reading ring buffer for first read */
 	info->read	= (unsigned int)-1;
-	if (!info->spare)
-		goto out;
 
 	filp->private_data = info;
 
-	return 0;
-
- out:
-	kfree(info);
-	return -ENOMEM;
+	return nonseekable_open(inode, filp);
 }
 
 static ssize_t
@@ -3304,6 +3298,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 	if (!count)
 		return 0;
 
+	if (!info->spare)
+		info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
+	if (!info->spare)
+		return -ENOMEM;
+
 	/* Do we have previous read data to read? */
 	if (info->read < PAGE_SIZE)
 		goto read;
@@ -3342,7 +3341,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
 {
 	struct ftrace_buffer_info *info = file->private_data;
 
-	ring_buffer_free_read_page(info->tr->buffer, info->spare);
+	if (info->spare)
+		ring_buffer_free_read_page(info->tr->buffer, info->spare);
 	kfree(info);
 
 	return 0;
@@ -3428,14 +3428,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 	int size, i;
 	size_t ret;
 
-	/*
-	 * We can't seek on a buffer input
-	 */
-	if (unlikely(*ppos))
-		return -ESPIPE;
+	if (*ppos & (PAGE_SIZE - 1)) {
+		WARN_ONCE(1, "Ftrace: previous read must page-align\n");
+		return -EINVAL;
+	}
 
+	if (len & (PAGE_SIZE - 1)) {
+		WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
+		if (len < PAGE_SIZE)
+			return -EINVAL;
+		len &= PAGE_MASK;
+	}
 
-	for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) {
+	for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) {
 		struct page *page;
 		int r;
 
@@ -3474,6 +3479,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		spd.partial[i].offset = 0;
 		spd.partial[i].private = (unsigned long)ref;
 		spd.nr_pages++;
+		*ppos += PAGE_SIZE;
 	}
 
 	spd.nr_pages = i;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 64ec4d2..576f4fa 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -503,6 +503,7 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
 	if (copy_from_user(&buf, ubuf, cnt))
 		return -EFAULT;
+	buf[cnt] = '\0';
 
 	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
 	if (!pred)
@@ -520,9 +521,10 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		return cnt;
 	}
 
-	if (filter_add_pred(call, pred)) {
+	err = filter_add_pred(call, pred);
+	if (err < 0) {
 		filter_free_pred(pred);
-		return -EINVAL;
+		return err;
 	}
 
 	*ppos += cnt;
@@ -569,6 +571,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
 	if (copy_from_user(&buf, ubuf, cnt))
 		return -EFAULT;
+	buf[cnt] = '\0';
 
 	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
 	if (!pred)
@@ -586,10 +589,11 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		return cnt;
 	}
 
-	if (filter_add_subsystem_pred(system, pred)) {
+	err = filter_add_subsystem_pred(system, pred);
+	if (err < 0) {
 		filter_free_subsystem_preds(system);
 		filter_free_pred(pred);
-		return -EINVAL;
+		return err;
 	}
 
 	*ppos += cnt;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 026be41..e03cbf1 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -215,7 +215,7 @@ static int __filter_add_pred(struct ftrace_event_call *call,
 		}
 	}
 
-	return -ENOMEM;
+	return -ENOSPC;
 }
 
 static int is_string_field(const char *type)
@@ -319,7 +319,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
 	}
 
 	if (i == MAX_FILTER_PRED)
-		return -EINVAL;
+		return -ENOSPC;
 
 	events_for_each(call) {
 		int err;
@@ -410,16 +410,22 @@ int filter_parse(char **pbuf, struct filter_pred *pred)
 		}
 	}
 
+	if (!val_str) {
+		pred->field_name = NULL;
+		return -EINVAL;
+	}
+
 	pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
 	if (!pred->field_name)
 		return -ENOMEM;
 
-	pred->val = simple_strtoull(val_str, &tmp, 10);
+	pred->val = simple_strtoull(val_str, &tmp, 0);
 	if (tmp == val_str) {
 		pred->str_val = kstrdup(val_str, GFP_KERNEL);
 		if (!pred->str_val)
 			return -ENOMEM;
-	}
+	} else if (*tmp != '\0')
+		return -EINVAL;
 
 	return 0;
 }
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
index 30743f7..d363c66 100644
--- a/kernel/trace/trace_events_stage_2.h
+++ b/kernel/trace/trace_events_stage_2.h
@@ -105,10 +105,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 		return 0;
 
 #undef __entry
-#define __entry "REC"
+#define __entry REC
 
 #undef TP_printk
-#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args
+#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
 
 #undef TP_fast_assign
 #define TP_fast_assign(args...) args

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-04-09 15:45 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-04-09 15:45 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Frederic Weisbecker (1):
      tracing/syscalls: use a dedicated file header

Li Zefan (2):
      blktrace: pass the right pointer to kfree()
      tracing: consolidate documents

Tetsuo Handa (1):
      tracing: append a comma to INIT_FTRACE_GRAPH


 Documentation/{ => trace}/ftrace.txt           |    0
 Documentation/{vm => trace}/kmemtrace.txt      |    0
 Documentation/{tracers => trace}/mmiotrace.txt |    0
 Documentation/{ => trace}/tracepoints.txt      |    0
 arch/x86/kernel/ftrace.c                       |    2 +
 arch/x86/kernel/ptrace.c                       |    3 +-
 include/linux/ftrace.h                         |   31 +--------------------
 include/linux/syscalls.h                       |    2 +-
 include/trace/syscall.h                        |   35 ++++++++++++++++++++++++
 kernel/trace/blktrace.c                        |   10 +++---
 kernel/trace/trace_syscalls.c                  |    2 +-
 11 files changed, 47 insertions(+), 38 deletions(-)
 rename Documentation/{ => trace}/ftrace.txt (100%)
 rename Documentation/{vm => trace}/kmemtrace.txt (100%)
 rename Documentation/{tracers => trace}/mmiotrace.txt (100%)
 rename Documentation/{ => trace}/tracepoints.txt (100%)
 create mode 100644 include/trace/syscall.h

diff --git a/Documentation/ftrace.txt b/Documentation/trace/ftrace.txt
similarity index 100%
rename from Documentation/ftrace.txt
rename to Documentation/trace/ftrace.txt
diff --git a/Documentation/vm/kmemtrace.txt b/Documentation/trace/kmemtrace.txt
similarity index 100%
rename from Documentation/vm/kmemtrace.txt
rename to Documentation/trace/kmemtrace.txt
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/trace/mmiotrace.txt
similarity index 100%
rename from Documentation/tracers/mmiotrace.txt
rename to Documentation/trace/mmiotrace.txt
diff --git a/Documentation/tracepoints.txt b/Documentation/trace/tracepoints.txt
similarity index 100%
rename from Documentation/tracepoints.txt
rename to Documentation/trace/tracepoints.txt
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 70a10ca..18dfa30 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -18,6 +18,8 @@
 #include <linux/init.h>
 #include <linux/list.h>
 
+#include <trace/syscall.h>
+
 #include <asm/cacheflush.h>
 #include <asm/ftrace.h>
 #include <asm/nops.h>
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fe9345c..23b7c8f 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,7 +21,6 @@
 #include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/signal.h>
-#include <linux/ftrace.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -35,6 +34,8 @@
 #include <asm/proto.h>
 #include <asm/ds.h>
 
+#include <trace/syscall.h>
+
 #include "tls.h"
 
 enum x86_regset {
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index da5405d..8a0c2f2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -357,7 +357,7 @@ struct ftrace_graph_ret {
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 /* for init task */
-#define INIT_FTRACE_GRAPH		.ret_stack = NULL
+#define INIT_FTRACE_GRAPH		.ret_stack = NULL,
 
 /*
  * Stack of return addresses for functions
@@ -511,33 +511,4 @@ static inline void trace_hw_branch_oops(void) {}
 
 #endif /* CONFIG_HW_BRANCH_TRACER */
 
-/*
- * A syscall entry in the ftrace syscalls array.
- *
- * @name: name of the syscall
- * @nb_args: number of parameters it takes
- * @types: list of types as strings
- * @args: list of args as strings (args[i] matches types[i])
- */
-struct syscall_metadata {
-	const char	*name;
-	int		nb_args;
-	const char	**types;
-	const char	**args;
-};
-
-#ifdef CONFIG_FTRACE_SYSCALLS
-extern void arch_init_ftrace_syscalls(void);
-extern struct syscall_metadata *syscall_nr_to_meta(int nr);
-extern void start_ftrace_syscalls(void);
-extern void stop_ftrace_syscalls(void);
-extern void ftrace_syscall_enter(struct pt_regs *regs);
-extern void ftrace_syscall_exit(struct pt_regs *regs);
-#else
-static inline void start_ftrace_syscalls(void) { }
-static inline void stop_ftrace_syscalls(void) { }
-static inline void ftrace_syscall_enter(struct pt_regs *regs) { }
-static inline void ftrace_syscall_exit(struct pt_regs *regs) { }
-#endif
-
 #endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 6470f74..dabe4ad 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -65,7 +65,7 @@ struct old_linux_dirent;
 #include <asm/signal.h>
 #include <linux/quota.h>
 #include <linux/key.h>
-#include <linux/ftrace.h>
+#include <trace/syscall.h>
 
 #define __SC_DECL1(t1, a1)	t1 a1
 #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
new file mode 100644
index 0000000..8cfe515
--- /dev/null
+++ b/include/trace/syscall.h
@@ -0,0 +1,35 @@
+#ifndef _TRACE_SYSCALL_H
+#define _TRACE_SYSCALL_H
+
+#include <asm/ptrace.h>
+
+/*
+ * A syscall entry in the ftrace syscalls array.
+ *
+ * @name: name of the syscall
+ * @nb_args: number of parameters it takes
+ * @types: list of types as strings
+ * @args: list of args as strings (args[i] matches types[i])
+ */
+struct syscall_metadata {
+	const char	*name;
+	int		nb_args;
+	const char	**types;
+	const char	**args;
+};
+
+#ifdef CONFIG_FTRACE_SYSCALLS
+extern void arch_init_ftrace_syscalls(void);
+extern struct syscall_metadata *syscall_nr_to_meta(int nr);
+extern void start_ftrace_syscalls(void);
+extern void stop_ftrace_syscalls(void);
+extern void ftrace_syscall_enter(struct pt_regs *regs);
+extern void ftrace_syscall_exit(struct pt_regs *regs);
+#else
+static inline void start_ftrace_syscalls(void)			{ }
+static inline void stop_ftrace_syscalls(void)			{ }
+static inline void ftrace_syscall_enter(struct pt_regs *regs)	{ }
+static inline void ftrace_syscall_exit(struct pt_regs *regs)	{ }
+#endif
+
+#endif /* _TRACE_SYSCALL_H */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b32ff44..921ef5d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1377,12 +1377,12 @@ static int blk_trace_str2mask(const char *str)
 {
 	int i;
 	int mask = 0;
-	char *s, *token;
+	char *buf, *s, *token;
 
-	s = kstrdup(str, GFP_KERNEL);
-	if (s == NULL)
+	buf = kstrdup(str, GFP_KERNEL);
+	if (buf == NULL)
 		return -ENOMEM;
-	s = strstrip(s);
+	s = strstrip(buf);
 
 	while (1) {
 		token = strsep(&s, ",");
@@ -1403,7 +1403,7 @@ static int blk_trace_str2mask(const char *str)
 			break;
 		}
 	}
-	kfree(s);
+	kfree(buf);
 
 	return mask;
 }
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index a2a3af2..5e57964 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,5 @@
+#include <trace/syscall.h>
 #include <linux/kernel.h>
-#include <linux/ftrace.h>
 #include <asm/syscall.h>
 
 #include "trace_output.h"

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [GIT PULL] tracing fixes
@ 2009-04-07 19:23 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-04-07 19:23 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Bart Van Assche (1):
      branch tracer: Fix for enabling branch profiling makes sparse unusable

Carl Henrik Lunde (1):
      blktrace: NUL-terminate user space messages

Frederic Weisbecker (2):
      tracing/ftrace: fix missing include string.h
      tracing/ftrace: alloc the started cpumask for the trace file

Huang Weiyi (1):
      tracing, x86: remove duplicated #include

Lai Jiangshan (2):
      tracing: move scripts/trace/power.pl to scripts/tracing/power.pl
      tracing: fix incorrect return type of ns2usecs()

Li Zefan (2):
      blktrace: small cleanup in blk_msg_write()
      blktrace: fix pdu_len when tracing packet command requests

Linus Torvalds (1):
      branch tracer, intel-iommu: fix build with CONFIG_BRANCH_TRACER=y

Nikanth Karthikesan (1):
      Update /debug/tracing/README

Steven Rostedt (2):
      tracing: remove CALLER_ADDR2 from wakeup tracer
      function-graph: add proper initialization for init task

Zhaolei (2):
      ftrace: Add check of sched_stopped for probe_sched_wakeup
      ftrace: Correct a text align for event format output


 arch/x86/kernel/ftrace.c            |    1 -
 block/blk-core.c                    |    1 +
 include/linux/compiler.h            |    7 +++++--
 include/linux/ftrace.h              |    8 ++++++--
 include/linux/init_task.h           |    2 ++
 kernel/trace/blktrace.c             |    7 ++++---
 kernel/trace/trace.c                |   21 ++++++++++++++++-----
 kernel/trace/trace.h                |    2 +-
 kernel/trace/trace_export.c         |    2 +-
 kernel/trace/trace_output.c         |    2 +-
 kernel/trace/trace_sched_switch.c   |    3 +++
 kernel/trace/trace_sched_wakeup.c   |    8 +++++++-
 scripts/{trace => tracing}/power.pl |    0
 13 files changed, 47 insertions(+), 17 deletions(-)
 rename scripts/{trace => tracing}/power.pl (100%)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 61df775..70a10ca 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -20,7 +20,6 @@
 
 #include <asm/cacheflush.h>
 #include <asm/ftrace.h>
-#include <linux/ftrace.h>
 #include <asm/nops.h>
 #include <asm/nmi.h>
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 29bcfac..859879d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -132,6 +132,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->cmd = rq->__cmd;
+	rq->cmd_len = BLK_MAX_CDB;
 	rq->tag = -1;
 	rq->ref_count = 1;
 }
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 6faa7e5..37bcb50 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -76,7 +76,8 @@ struct ftrace_branch_data {
  * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
  * to disable branch tracing on a per file basis.
  */
-#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING)
+#if defined(CONFIG_TRACE_BRANCH_PROFILING) \
+    && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
 void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 
 #define likely_notrace(x)	__builtin_expect(!!(x), 1)
@@ -114,7 +115,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
  * "Define 'is'", Bill Clinton
  * "Define 'if'", Steven Rostedt
  */
-#define if(cond) if (__builtin_constant_p((cond)) ? !!(cond) :		\
+#define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) )
+#define __trace_if(cond) \
+	if (__builtin_constant_p((cond)) ? !!(cond) :			\
 	({								\
 		int ______r;						\
 		static struct ftrace_branch_data			\
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 015a3d2..da5405d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -356,6 +356,9 @@ struct ftrace_graph_ret {
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
+/* for init task */
+#define INIT_FTRACE_GRAPH		.ret_stack = NULL
+
 /*
  * Stack of return addresses for functions
  * of a thread.
@@ -430,10 +433,11 @@ static inline void unpause_graph_tracing(void)
 {
 	atomic_dec(&current->tracing_graph_pause);
 }
-#else
+#else /* !CONFIG_FUNCTION_GRAPH_TRACER */
 
 #define __notrace_funcgraph
 #define __irq_entry
+#define INIT_FTRACE_GRAPH
 
 static inline void ftrace_graph_init_task(struct task_struct *t) { }
 static inline void ftrace_graph_exit_task(struct task_struct *t) { }
@@ -445,7 +449,7 @@ static inline int task_curr_ret_stack(struct task_struct *tsk)
 
 static inline void pause_graph_tracing(void) { }
 static inline void unpause_graph_tracing(void) { }
-#endif
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 #ifdef CONFIG_TRACING
 #include <linux/sched.h>
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e752d97..cada054 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -5,6 +5,7 @@
 #include <linux/irqflags.h>
 #include <linux/utsname.h>
 #include <linux/lockdep.h>
+#include <linux/ftrace.h>
 #include <linux/ipc.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
@@ -184,6 +185,7 @@ extern struct cred init_cred;
 	INIT_IDS							\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
+	INIT_FTRACE_GRAPH						\
 }
 
 
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 947c5b3..b32ff44 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
 	char *msg;
 	struct blk_trace *bt;
 
-	if (count > BLK_TN_MAX_MSG)
+	if (count >= BLK_TN_MAX_MSG)
 		return -EINVAL;
 
-	msg = kmalloc(count, GFP_KERNEL);
+	msg = kmalloc(count + 1, GFP_KERNEL);
 	if (msg == NULL)
 		return -ENOMEM;
 
@@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
 		return -EFAULT;
 	}
 
+	msg[count] = '\0';
 	bt = filp->private_data;
 	__trace_note_message(bt, "%s", msg);
 	kfree(msg);
@@ -642,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
 	if (blk_pc_request(rq)) {
 		what |= BLK_TC_ACT(BLK_TC_PC);
 		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
-				sizeof(rq->cmd), rq->cmd);
+				rq->cmd_len, rq->cmd);
 	} else  {
 		what |= BLK_TC_ACT(BLK_TC_FS);
 		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a0174a4..9d28476 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -30,6 +30,7 @@
 #include <linux/percpu.h>
 #include <linux/splice.h>
 #include <linux/kdebug.h>
+#include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/poll.h>
@@ -147,8 +148,7 @@ static int __init set_ftrace_dump_on_oops(char *str)
 }
 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 
-long
-ns2usecs(cycle_t nsec)
+unsigned long long ns2usecs(cycle_t nsec)
 {
 	nsec += 500;
 	do_div(nsec, 1000);
@@ -1632,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
 		return;
 
 	cpumask_set_cpu(iter->cpu, iter->started);
-	trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+
+	/* Don't print started cpu buffer for the first entry of the trace */
+	if (iter->idx > 1)
+		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
+				iter->cpu);
 }
 
 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1867,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file)
 	if (current_trace)
 		*iter->trace = *current_trace;
 
+	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
+		goto fail;
+
+	cpumask_clear(iter->started);
+
 	if (current_trace && current_trace->print_max)
 		iter->tr = &max_tr;
 	else
@@ -1917,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file)
 		if (iter->buffer_iter[cpu])
 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
 	}
+	free_cpumask_var(iter->started);
  fail:
 	mutex_unlock(&trace_types_lock);
 	kfree(iter->trace);
@@ -1960,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file)
 
 	seq_release(inode, file);
 	mutex_destroy(&iter->mutex);
+	free_cpumask_var(iter->started);
 	kfree(iter->trace);
 	kfree(iter);
 	return 0;
@@ -2358,9 +2369,9 @@ static const char readme_msg[] =
 	"# mkdir /debug\n"
 	"# mount -t debugfs nodev /debug\n\n"
 	"# cat /debug/tracing/available_tracers\n"
-	"wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
+	"wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
 	"# cat /debug/tracing/current_tracer\n"
-	"none\n"
+	"nop\n"
 	"# echo sched_switch > /debug/tracing/current_tracer\n"
 	"# cat /debug/tracing/current_tracer\n"
 	"sched_switch\n"
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cb0ce3f..0d81a4a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -596,7 +596,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
-extern long ns2usecs(cycle_t nsec);
+extern unsigned long long ns2usecs(cycle_t nsec);
 extern int
 trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
 extern int
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4d9952d..07a22c3 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -40,7 +40,7 @@
 
 #undef TRACE_FIELD_ZERO_CHAR
 #define TRACE_FIELD_ZERO_CHAR(item)					\
-	ret = trace_seq_printf(s, "\tfield: char " #item ";\t"		\
+	ret = trace_seq_printf(s, "\tfield:char " #item ";\t"		\
 			       "offset:%u;\tsize:0;\n",			\
 			       (unsigned int)offsetof(typeof(field), item)); \
 	if (!ret)							\
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index d72b9a6..64b54a5 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
 
 		trace_find_cmdline(entry->pid, comm);
 
-		ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
+		ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
 				       " %ld.%03ldms (+%ld.%03ldms): ", comm,
 				       entry->pid, iter->cpu, entry->flags,
 				       entry->preempt_count, iter->idx,
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index de35f20..9117cea 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -62,6 +62,9 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
 	pc = preempt_count();
 	tracing_record_cmdline(current);
 
+	if (sched_stopped)
+		return;
+
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = ctx_trace->data[cpu];
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3c5ad6b..5bc00e8 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -154,7 +154,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 	if (unlikely(!tracer_enabled || next != wakeup_task))
 		goto out_unlock;
 
-	trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+	trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
 	tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
 
 	/*
@@ -257,6 +257,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
 	data = wakeup_trace->data[wakeup_cpu];
 	data->preempt_timestamp = ftrace_now(cpu);
 	tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
+
+	/*
+	 * We must be careful in using CALLER_ADDR2. But since wake_up
+	 * is not called by an assembly function  (where as schedule is)
+	 * it should be safe to use it here.
+	 */
 	trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 
 out_locked:
diff --git a/scripts/trace/power.pl b/scripts/tracing/power.pl
similarity index 100%
rename from scripts/trace/power.pl
rename to scripts/tracing/power.pl

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2009-02-19 17:08 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-02-19 17:08 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Frederic Weisbecker (1):
      tracing/function-graph-tracer: trace the idle tasks

Steven Rostedt (3):
      tracing: disable tracing while testing ring buffer
      tracing: have function trace select kallsyms
      tracing: limit the number of loops the ring buffer self test can make


 kernel/trace/Kconfig          |    2 ++
 kernel/trace/ftrace.c         |    6 +++++-
 kernel/trace/trace_selftest.c |   19 +++++++++++++++++++
 3 files changed, 26 insertions(+), 1 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 58a93fb..34e707e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -52,6 +52,7 @@ config FUNCTION_TRACER
 	depends on HAVE_FUNCTION_TRACER
 	depends on DEBUG_KERNEL
 	select FRAME_POINTER
+	select KALLSYMS
 	select TRACING
 	select CONTEXT_SWITCH_TRACER
 	help
@@ -238,6 +239,7 @@ config STACK_TRACER
 	depends on DEBUG_KERNEL
 	select FUNCTION_TRACER
 	select STACKTRACE
+	select KALLSYMS
 	help
 	  This special tracer records the maximum stack footprint of the
 	  kernel and displays it in debugfs/tracing/stack_trace.
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9a236ff..fdf913d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2033,7 +2033,7 @@ free:
 static int start_graph_tracing(void)
 {
 	struct ftrace_ret_stack **ret_stack_list;
-	int ret;
+	int ret, cpu;
 
 	ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
 				sizeof(struct ftrace_ret_stack *),
@@ -2042,6 +2042,10 @@ static int start_graph_tracing(void)
 	if (!ret_stack_list)
 		return -ENOMEM;
 
+	/* The cpu_boot init_task->ret_stack will never be freed */
+	for_each_online_cpu(cpu)
+		ftrace_graph_init_task(idle_task(cpu));
+
 	do {
 		ret = alloc_retstack_tasklist(ret_stack_list);
 	} while (ret == -EAGAIN);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 88c8eb7..bc8e80a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -23,10 +23,20 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
 {
 	struct ring_buffer_event *event;
 	struct trace_entry *entry;
+	unsigned int loops = 0;
 
 	while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
 		entry = ring_buffer_event_data(event);
 
+		/*
+		 * The ring buffer is a size of trace_buf_size, if
+		 * we loop more than the size, there's something wrong
+		 * with the ring buffer.
+		 */
+		if (loops++ > trace_buf_size) {
+			printk(KERN_CONT ".. bad ring buffer ");
+			goto failed;
+		}
 		if (!trace_valid_entry(entry)) {
 			printk(KERN_CONT ".. invalid entry %d ",
 				entry->type);
@@ -57,11 +67,20 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
 
 	cnt = ring_buffer_entries(tr->buffer);
 
+	/*
+	 * The trace_test_buffer_cpu runs a while loop to consume all data.
+	 * If the calling tracer is broken, and is constantly filling
+	 * the buffer, this will run forever, and hard lock the box.
+	 * We disable the ring buffer while we do this test to prevent
+	 * a hard lock up.
+	 */
+	tracing_off();
 	for_each_possible_cpu(cpu) {
 		ret = trace_test_buffer_cpu(tr, cpu);
 		if (ret)
 			break;
 	}
+	tracing_on();
 	__raw_spin_unlock(&ftrace_max_lock);
 	local_irq_restore(flags);
 

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2009-02-17 16:37 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-02-17 16:37 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Pekka Paalanen (3):
      mmiotrace: count events lost due to not recording
      trace: mmiotrace to the tracer menu in Kconfig
      doc: mmiotrace.txt, buffer size control change


 Documentation/tracers/mmiotrace.txt |    6 ++----
 arch/x86/Kconfig.debug              |   24 ++----------------------
 kernel/trace/Kconfig                |   23 +++++++++++++++++++++++
 kernel/trace/trace_mmiotrace.c      |   14 ++++++++++----
 4 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
index cde23b4..5731c67 100644
--- a/Documentation/tracers/mmiotrace.txt
+++ b/Documentation/tracers/mmiotrace.txt
@@ -78,12 +78,10 @@ to view your kernel log and look for "mmiotrace has lost events" warning. If
 events were lost, the trace is incomplete. You should enlarge the buffers and
 try again. Buffers are enlarged by first seeing how large the current buffers
 are:
-$ cat /debug/tracing/trace_entries
+$ cat /debug/tracing/buffer_size_kb
 gives you a number. Approximately double this number and write it back, for
 instance:
-$ echo 0 > /debug/tracing/tracing_enabled
-$ echo 128000 > /debug/tracing/trace_entries
-$ echo 1 > /debug/tracing/tracing_enabled
+$ echo 128000 > /debug/tracing/buffer_size_kb
 Then start again from the top.
 
 If you are doing a trace for a driver project, e.g. Nouveau, you should also
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 10d6cc3..e1983fa 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -174,28 +174,8 @@ config IOMMU_LEAK
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
 
-config MMIOTRACE
-	bool "Memory mapped IO tracing"
-	depends on DEBUG_KERNEL && PCI
-	select TRACING
-	help
-	  Mmiotrace traces Memory Mapped I/O access and is meant for
-	  debugging and reverse engineering. It is called from the ioremap
-	  implementation and works via page faults. Tracing is disabled by
-	  default and can be enabled at run-time.
-
-	  See Documentation/tracers/mmiotrace.txt.
-	  If you are not helping to develop drivers, say N.
-
-config MMIOTRACE_TEST
-	tristate "Test module for mmiotrace"
-	depends on MMIOTRACE && m
-	help
-	  This is a dumb module for testing mmiotrace. It is very dangerous
-	  as it will write garbage to IO memory starting at a given address.
-	  However, it should be safe to use on e.g. unused portion of VRAM.
-
-	  Say N, unless you absolutely know what you are doing.
+config HAVE_MMIOTRACE_SUPPORT
+	def_bool y
 
 #
 # IO delay types:
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e2a4ff6..58a93fb 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -302,4 +302,27 @@ config FTRACE_STARTUP_TEST
 	  functioning properly. It will do tests on all the configured
 	  tracers of ftrace.
 
+config MMIOTRACE
+	bool "Memory mapped IO tracing"
+	depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI
+	select TRACING
+	help
+	  Mmiotrace traces Memory Mapped I/O access and is meant for
+	  debugging and reverse engineering. It is called from the ioremap
+	  implementation and works via page faults. Tracing is disabled by
+	  default and can be enabled at run-time.
+
+	  See Documentation/tracers/mmiotrace.txt.
+	  If you are not helping to develop drivers, say N.
+
+config MMIOTRACE_TEST
+	tristate "Test module for mmiotrace"
+	depends on MMIOTRACE && m
+	help
+	  This is a dumb module for testing mmiotrace. It is very dangerous
+	  as it will write garbage to IO memory starting at a given address.
+	  However, it should be safe to use on e.g. unused portion of VRAM.
+
+	  Say N, unless you absolutely know what you are doing.
+
 endmenu
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index fffcb06..80e503e 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/mmiotrace.h>
 #include <linux/pci.h>
+#include <asm/atomic.h>
 
 #include "trace.h"
 
@@ -19,6 +20,7 @@ struct header_iter {
 static struct trace_array *mmio_trace_array;
 static bool overrun_detected;
 static unsigned long prev_overruns;
+static atomic_t dropped_count;
 
 static void mmio_reset_data(struct trace_array *tr)
 {
@@ -121,11 +123,11 @@ static void mmio_close(struct trace_iterator *iter)
 
 static unsigned long count_overruns(struct trace_iterator *iter)
 {
-	unsigned long cnt = 0;
+	unsigned long cnt = atomic_xchg(&dropped_count, 0);
 	unsigned long over = ring_buffer_overruns(iter->tr->buffer);
 
 	if (over > prev_overruns)
-		cnt = over - prev_overruns;
+		cnt += over - prev_overruns;
 	prev_overruns = over;
 	return cnt;
 }
@@ -310,8 +312,10 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 
 	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
 					   &irq_flags);
-	if (!event)
+	if (!event) {
+		atomic_inc(&dropped_count);
 		return;
+	}
 	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
 	entry->ent.type			= TRACE_MMIO_RW;
@@ -338,8 +342,10 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 
 	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
 					   &irq_flags);
-	if (!event)
+	if (!event) {
+		atomic_inc(&dropped_count);
 		return;
+	}
 	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
 	entry->ent.type			= TRACE_MMIO_MAP;

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2009-02-11 14:25 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-02-11 14:25 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Hugh Dickins (1):
      profiling: fix broken profiling regression

Steven Rostedt (2):
      tracing, x86: fix fixup section to return to original code
      tracing, x86: fix constraint for parent variable


 arch/x86/kernel/ftrace.c |   17 +++++++++--------
 kernel/profile.c         |    3 +++
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1b43086..231bdd3 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -488,20 +488,21 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 	 * ignore such a protection.
 	 */
 	asm volatile(
-		"1: " _ASM_MOV " (%[parent_old]), %[old]\n"
-		"2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n"
+		"1: " _ASM_MOV " (%[parent]), %[old]\n"
+		"2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
 		"   movl $0, %[faulted]\n"
+		"3:\n"
 
 		".section .fixup, \"ax\"\n"
-		"3: movl $1, %[faulted]\n"
+		"4: movl $1, %[faulted]\n"
+		"   jmp 3b\n"
 		".previous\n"
 
-		_ASM_EXTABLE(1b, 3b)
-		_ASM_EXTABLE(2b, 3b)
+		_ASM_EXTABLE(1b, 4b)
+		_ASM_EXTABLE(2b, 4b)
 
-		: [parent_replaced] "=r" (parent), [old] "=r" (old),
-		  [faulted] "=r" (faulted)
-		: [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
+		: [old] "=r" (old), [faulted] "=r" (faulted)
+		: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
 		: "memory"
 	);
 
diff --git a/kernel/profile.c b/kernel/profile.c
index 784933a..7724e04 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -114,12 +114,15 @@ int __ref profile_init(void)
 	if (!slab_is_available()) {
 		prof_buffer = alloc_bootmem(buffer_bytes);
 		alloc_bootmem_cpumask_var(&prof_cpu_mask);
+		cpumask_copy(prof_cpu_mask, cpu_possible_mask);
 		return 0;
 	}
 
 	if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
 		return -ENOMEM;
 
+	cpumask_copy(prof_cpu_mask, cpu_possible_mask);
+
 	prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
 	if (prof_buffer)
 		return 0;

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2009-02-04 19:08 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-02-04 19:08 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Oleg Nesterov (1):
      ftrace: do_each_pid_task() needs rcu lock


 kernel/trace/ftrace.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7dcf6e9..9a236ff 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1737,9 +1737,12 @@ static void clear_ftrace_pid(struct pid *pid)
 {
 	struct task_struct *p;
 
+	rcu_read_lock();
 	do_each_pid_task(pid, PIDTYPE_PID, p) {
 		clear_tsk_trace_trace(p);
 	} while_each_pid_task(pid, PIDTYPE_PID, p);
+	rcu_read_unlock();
+
 	put_pid(pid);
 }
 
@@ -1747,9 +1750,11 @@ static void set_ftrace_pid(struct pid *pid)
 {
 	struct task_struct *p;
 
+	rcu_read_lock();
 	do_each_pid_task(pid, PIDTYPE_PID, p) {
 		set_tsk_trace_trace(p);
 	} while_each_pid_task(pid, PIDTYPE_PID, p);
+	rcu_read_unlock();
 }
 
 static void clear_ftrace_pid_task(struct pid **pid)

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2009-01-30 23:02 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-01-30 23:02 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Andrew Morton, Steven Rostedt, Thomas Gleixner

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

these are cherry-picked fixes from the tracing tree.

 Thanks,

	Ingo

------------------>
Frederic Weisbecker (1):
      tracing/function-graph-tracer: fix a regression while suspend to disk

Lai Jiangshan (1):
      ring_buffer: reset write when reserve buffer fail

Markus Metzger (1):
      x86, ds, bts: cleanup/fix DS configuration

Steven Rostedt (5):
      ring-buffer: fix alignment problem
      trace: print ftrace_dump at KERN_EMERG log level
      trace: stop all recording to ring buffer on ftrace_dump
      trace: set max latency variable to zero on default
      ring-buffer: reset timestamps when ring buffer is reset


 arch/x86/kernel/ds.c              |   31 +++++++++++++++++--------------
 kernel/trace/ftrace.c             |   27 +++++++++++++++++++++++++++
 kernel/trace/ring_buffer.c        |   15 +++++++++------
 kernel/trace/trace.c              |    5 +++--
 kernel/trace/trace_irqsoff.c      |    1 +
 kernel/trace/trace_sched_wakeup.c |    1 +
 6 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index da91701..169a120 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -15,8 +15,8 @@
  * - buffer allocation (memory accounting)
  *
  *
- * Copyright (C) 2007-2008 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
+ * Copyright (C) 2007-2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  */
 
 
@@ -890,7 +890,7 @@ int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
 }
 
 static const struct ds_configuration ds_cfg_netburst = {
-	.name = "netburst",
+	.name = "Netburst",
 	.ctl[dsf_bts]		= (1 << 2) | (1 << 3),
 	.ctl[dsf_bts_kernel]	= (1 << 5),
 	.ctl[dsf_bts_user]	= (1 << 6),
@@ -904,7 +904,7 @@ static const struct ds_configuration ds_cfg_netburst = {
 #endif
 };
 static const struct ds_configuration ds_cfg_pentium_m = {
-	.name = "pentium m",
+	.name = "Pentium M",
 	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
 
 	.sizeof_field		= sizeof(long),
@@ -915,8 +915,8 @@ static const struct ds_configuration ds_cfg_pentium_m = {
 	.sizeof_rec[ds_pebs]	= sizeof(long) * 18,
 #endif
 };
-static const struct ds_configuration ds_cfg_core2 = {
-	.name = "core 2",
+static const struct ds_configuration ds_cfg_core2_atom = {
+	.name = "Core 2/Atom",
 	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
 	.ctl[dsf_bts_kernel]	= (1 << 9),
 	.ctl[dsf_bts_user]	= (1 << 10),
@@ -949,19 +949,22 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 	switch (c->x86) {
 	case 0x6:
 		switch (c->x86_model) {
-		case 0 ... 0xC:
-			/* sorry, don't know about them */
-			break;
-		case 0xD:
-		case 0xE: /* Pentium M */
+		case 0x9:
+		case 0xd: /* Pentium M */
 			ds_configure(&ds_cfg_pentium_m);
 			break;
-		default: /* Core2, Atom, ... */
-			ds_configure(&ds_cfg_core2);
+		case 0xf:
+		case 0x17: /* Core2 */
+		case 0x1c: /* Atom */
+			ds_configure(&ds_cfg_core2_atom);
+			break;
+		case 0x1a: /* i7 */
+		default:
+			/* sorry, don't know about them */
 			break;
 		}
 		break;
-	case 0xF:
+	case 0xf:
 		switch (c->x86_model) {
 		case 0x0:
 		case 0x1:
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2f32969..7dcf6e9 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -17,6 +17,7 @@
 #include <linux/clocksource.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
+#include <linux/suspend.h>
 #include <linux/debugfs.h>
 #include <linux/hardirq.h>
 #include <linux/kthread.h>
@@ -1965,6 +1966,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 static atomic_t ftrace_graph_active;
+static struct notifier_block ftrace_suspend_notifier;
 
 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
 {
@@ -2043,6 +2045,27 @@ static int start_graph_tracing(void)
 	return ret;
 }
 
+/*
+ * Hibernation protection.
+ * The state of the current task is too much unstable during
+ * suspend/restore to disk. We want to protect against that.
+ */
+static int
+ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
+							void *unused)
+{
+	switch (state) {
+	case PM_HIBERNATION_PREPARE:
+		pause_graph_tracing();
+		break;
+
+	case PM_POST_HIBERNATION:
+		unpause_graph_tracing();
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
 int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 			trace_func_graph_ent_t entryfunc)
 {
@@ -2050,6 +2073,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 
 	mutex_lock(&ftrace_sysctl_lock);
 
+	ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
+	register_pm_notifier(&ftrace_suspend_notifier);
+
 	atomic_inc(&ftrace_graph_active);
 	ret = start_graph_tracing();
 	if (ret) {
@@ -2075,6 +2101,7 @@ void unregister_ftrace_graph(void)
 	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
 	ftrace_graph_entry = ftrace_graph_entry_stub;
 	ftrace_shutdown(FTRACE_STOP_FUNC_RET);
+	unregister_pm_notifier(&ftrace_suspend_notifier);
 
 	mutex_unlock(&ftrace_sysctl_lock);
 }
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8b0daf0..bd38c5c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta)
 	return 0;
 }
 
-#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
+#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data))
 
 /*
  * head_page == tail_page && head == tail then buffer is empty.
@@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		}
 
 		if (next_page == head_page) {
-			if (!(buffer->flags & RB_FL_OVERWRITE)) {
-				/* reset write */
-				if (tail <= BUF_PAGE_SIZE)
-					local_set(&tail_page->write, tail);
+			if (!(buffer->flags & RB_FL_OVERWRITE))
 				goto out_unlock;
-			}
 
 			/* tail_page has not moved yet? */
 			if (tail_page == cpu_buffer->tail_page) {
@@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	return event;
 
  out_unlock:
+	/* reset write */
+	if (tail <= BUF_PAGE_SIZE)
+		local_set(&tail_page->write, tail);
+
 	__raw_spin_unlock(&cpu_buffer->lock);
 	local_irq_restore(flags);
 	return NULL;
@@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 
 	cpu_buffer->overrun = 0;
 	cpu_buffer->entries = 0;
+
+	cpu_buffer->write_stamp = 0;
+	cpu_buffer->read_stamp = 0;
 }
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c580233..17bb88d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -40,7 +40,7 @@
 
 #define TRACE_BUFFER_FLAGS	(RB_FL_OVERWRITE)
 
-unsigned long __read_mostly	tracing_max_latency = (cycle_t)ULONG_MAX;
+unsigned long __read_mostly	tracing_max_latency;
 unsigned long __read_mostly	tracing_thresh;
 
 /*
@@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = {
  * it if we decide to change what log level the ftrace dump
  * should be at.
  */
-#define KERN_TRACE		KERN_INFO
+#define KERN_TRACE		KERN_EMERG
 
 static void
 trace_printk_seq(struct trace_seq *s)
@@ -3770,6 +3770,7 @@ void ftrace_dump(void)
 	dump_ran = 1;
 
 	/* No turning back! */
+	tracing_off();
 	ftrace_kill();
 
 	for_each_tracing_cpu(cpu) {
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 7c2e326..62a78d9 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr)
 
 static void __irqsoff_tracer_init(struct trace_array *tr)
 {
+	tracing_max_latency = 0;
 	irqsoff_trace = tr;
 	/* make sure that the tracer is visible */
 	smp_wmb();
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 43586b6..42ae1e7 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr)
 
 static int wakeup_tracer_init(struct trace_array *tr)
 {
+	tracing_max_latency = 0;
 	wakeup_trace = tr;
 	start_wakeup_tracer(tr);
 	return 0;

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2009-01-11 14:47 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2009-01-11 14:47 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Andrew Morton, Steven Rostedt, Thomas Gleixner

Linus,

Please pull the latest tracing updates+fixes git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

( Note: this tree also includes the bits sent earlier in the merge window 
  but not applied yet. )

 Thanks,

	Ingo

------------------>
Andrew Morton (2):
      kernel/trace/ring_buffer.c: reduce inlining
      kernel/trace/ring_buffer.c: use DIV_ROUND_UP

Eduard - Gabriel Munteanu (10):
      SLUB: Replace __builtin_return_address(0) with _RET_IP_.
      kmemtrace: Core implementation.
      kmemtrace: Additional documentation.
      kmemtrace: SLAB hooks.
      kmemtrace: SLOB hooks.
      kmemtrace: SLUB hooks.
      kmemtrace: Fix typos in documentation.
      kmemtrace: Better alternative to "kmemtrace: fix printk format warnings".
      kmemtrace: SLUB hooks for caller-tracking functions.
      kmemtrace: Remove the relay version of kmemtrace

Frederic Weisbecker (6):
      tracing/function-graph-tracer: strip ending newlines on comments
      tracing/ftrace: provide the base infrastructure for histogram tracing
      tracing/branch-tracer: adapt to the stat tracing API
      tracing/kmemtrace: normalize the raw tracer event to the unified tracing API
      tracing/ftrace: fix a memory leak in stat tracing
      tracing/ftrace: handle more than one stat file per tracer

Ingo Molnar (8):
      tracing/ftrace: make trace_find_cmdline() generally available
      tracing/selftest: remove TRACE_CONT reference
      kmemtrace: move #include lines
      relayfs: replace BUG() with WARN_ON() in relay_late_setup_files()
      tracing, kvm: change MARKERS to select instead of depends on
      tracing/kmemtrace: export kmemtrace_mark_alloc_node() / kmemtrace_mark_free()
      tracing/kmemtrace: fix typo
      kmemtrace: add kmemtrace_init()

Pekka Enberg (3):
      kmemtrace: remove unnecessary casts
      kmemtrace: allow kmemtrace to be enabled after boot
      kmemtrace: remove config option for enabling tracing at boot

Pekka Paalanen (3):
      doc: mmiotrace.txt, buffer size control change
      trace: mmiotrace to the tracer menu in Kconfig
      mmiotrace: count events lost due to not recording

Randy Dunlap (1):
      sysrq: fix ftrace help msg & doc.

Steven Rostedt (5):
      ftrace: remove obsolete print continue functionality
      ftrace: set up trace event hash infrastructure
      ftrace: change trace.c to use registered events
      ftrace: convert unsigned index to signed
      trace: clean up funny line breaks in stat_seq_show

Vegard Nossum (1):
      kmemtrace: add missing newline


 Documentation/ABI/testing/debugfs-kmemtrace |   71 +++
 Documentation/kernel-parameters.txt         |   10 +
 Documentation/sysrq.txt                     |    2 +
 Documentation/tracers/mmiotrace.txt         |    6 +-
 Documentation/vm/kmemtrace.txt              |  126 ++++
 MAINTAINERS                                 |    6 +
 arch/x86/Kconfig.debug                      |   24 +-
 arch/x86/kvm/Kconfig                        |    3 +-
 drivers/char/sysrq.c                        |    2 +-
 include/linux/slab_def.h                    |   68 ++-
 include/linux/slob_def.h                    |    9 +-
 include/linux/slub_def.h                    |   53 ++-
 include/trace/kmemtrace.h                   |   75 +++
 init/main.c                                 |    2 +
 kernel/relay.c                              |    4 +-
 kernel/trace/Kconfig                        |   44 ++
 kernel/trace/Makefile                       |    3 +
 kernel/trace/ftrace.c                       |    4 +-
 kernel/trace/kmemtrace.c                    |  350 +++++++++++
 kernel/trace/ring_buffer.c                  |   40 +-
 kernel/trace/trace.c                        |  741 ++----------------------
 kernel/trace/trace.h                        |   66 ++-
 kernel/trace/trace_boot.c                   |    1 +
 kernel/trace/trace_branch.c                 |  284 ++++++----
 kernel/trace/trace_functions_graph.c        |    8 +-
 kernel/trace/trace_hw_branches.c            |    1 +
 kernel/trace/trace_mmiotrace.c              |   18 +-
 kernel/trace/trace_output.c                 |  832 +++++++++++++++++++++++++++
 kernel/trace/trace_output.h                 |   59 ++
 kernel/trace/trace_power.c                  |    1 +
 kernel/trace/trace_selftest.c               |    1 -
 kernel/trace/trace_stat.c                   |  351 +++++++++++
 mm/slab.c                                   |   71 ++-
 mm/slob.c                                   |   37 +-
 mm/slub.c                                   |   83 +++-
 35 files changed, 2522 insertions(+), 934 deletions(-)
 create mode 100644 Documentation/ABI/testing/debugfs-kmemtrace
 create mode 100644 Documentation/vm/kmemtrace.txt
 create mode 100644 include/trace/kmemtrace.h
 create mode 100644 kernel/trace/kmemtrace.c
 create mode 100644 kernel/trace/trace_output.c
 create mode 100644 kernel/trace/trace_output.h
 create mode 100644 kernel/trace/trace_stat.c

diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
new file mode 100644
index 0000000..5e6a92a
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-kmemtrace
@@ -0,0 +1,71 @@
+What:		/sys/kernel/debug/kmemtrace/
+Date:		July 2008
+Contact:	Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
+Description:
+
+In kmemtrace-enabled kernels, the following files are created:
+
+/sys/kernel/debug/kmemtrace/
+	cpu<n>		(0400)	Per-CPU tracing data, see below. (binary)
+	total_overruns	(0400)	Total number of bytes which were dropped from
+				cpu<n> files because of full buffer condition,
+				non-binary. (text)
+	abi_version	(0400)	Kernel's kmemtrace ABI version. (text)
+
+Each per-CPU file should be read according to the relay interface. That is,
+the reader should set affinity to that specific CPU and, as currently done by
+the userspace application (though there are other methods), use poll() with
+an infinite timeout before every read(). Otherwise, erroneous data may be
+read. The binary data has the following _core_ format:
+
+	Event ID	(1 byte)	Unsigned integer, one of:
+		0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
+		1 - represents a freeing of previously allocated memory
+		    (KMEMTRACE_EVENT_FREE)
+	Type ID		(1 byte)	Unsigned integer, one of:
+		0 - this is a kmalloc() / kfree()
+		1 - this is a kmem_cache_alloc() / kmem_cache_free()
+		2 - this is a __get_free_pages() et al.
+	Event size	(2 bytes)	Unsigned integer representing the
+					size of this event. Used to extend
+					kmemtrace. Discard the bytes you
+					don't know about.
+	Sequence number	(4 bytes)	Signed integer used to reorder data
+					logged on SMP machines. Wraparound
+					must be taken into account, although
+					it is unlikely.
+	Caller address	(8 bytes)	Return address to the caller.
+	Pointer to mem	(8 bytes)	Pointer to target memory area. Can be
+					NULL, but not all such calls might be
+					recorded.
+
+In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
+
+	Requested bytes	(8 bytes)	Total number of requested bytes,
+					unsigned, must not be zero.
+	Allocated bytes (8 bytes)	Total number of actually allocated
+					bytes, unsigned, must not be lower
+					than requested bytes.
+	Requested flags	(4 bytes)	GFP flags supplied by the caller.
+	Target CPU	(4 bytes)	Signed integer, valid for event id 1.
+					If equal to -1, target CPU is the same
+					as origin CPU, but the reverse might
+					not be true.
+
+The data is made available in the same endianness the machine has.
+
+Other event ids and type ids may be defined and added. Other fields may be
+added by increasing event size, but see below for details.
+Every modification to the ABI, including new id definitions, are followed
+by bumping the ABI version by one.
+
+Adding new data to the packet (features) is done at the end of the mandatory
+data:
+	Feature size	(2 byte)
+	Feature ID	(1 byte)
+	Feature data	(Feature size - 3 bytes)
+
+
+Users:
+	kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
+
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8511d35..ac613a6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -49,6 +49,7 @@ parameter is applicable:
 	ISAPNP	ISA PnP code is enabled.
 	ISDN	Appropriate ISDN support is enabled.
 	JOY	Appropriate joystick support is enabled.
+	KMEMTRACE kmemtrace is enabled.
 	LIBATA  Libata driver is enabled
 	LP	Printer support is enabled.
 	LOOP	Loopback device support is enabled.
@@ -1050,6 +1051,15 @@ and is between 256 and 4096 characters. It is defined in the file
 			use the HighMem zone if it exists, and the Normal
 			zone if it does not.
 
+	kmemtrace.enable=	[KNL,KMEMTRACE] Format: { yes | no }
+				Controls whether kmemtrace is enabled
+				at boot-time.
+
+	kmemtrace.subbufs=n	[KNL,KMEMTRACE] Overrides the number of
+			subbufs kmemtrace's relay channel has. Set this
+			higher than default (KMEMTRACE_N_SUBBUFS in code) if
+			you experience buffer overruns.
+
 	movablecore=nn[KMG]	[KNL,X86-32,IA-64,PPC,X86-64] This parameter
 			is similar to kernelcore except it specifies the
 			amount of memory used for migratable allocations.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 10a0263..56b53e0 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -114,6 +114,8 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'x'	- Used by xmon interface on ppc/powerpc platforms.
 
+'z'	- Dump the ftrace buffer
+
 '0'-'9' - Sets the console log level, controlling which kernel messages
           will be printed to your console. ('0', for example would make
           it so that only emergency messages like PANICs or OOPSes would
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
index cde23b4..5731c67 100644
--- a/Documentation/tracers/mmiotrace.txt
+++ b/Documentation/tracers/mmiotrace.txt
@@ -78,12 +78,10 @@ to view your kernel log and look for "mmiotrace has lost events" warning. If
 events were lost, the trace is incomplete. You should enlarge the buffers and
 try again. Buffers are enlarged by first seeing how large the current buffers
 are:
-$ cat /debug/tracing/trace_entries
+$ cat /debug/tracing/buffer_size_kb
 gives you a number. Approximately double this number and write it back, for
 instance:
-$ echo 0 > /debug/tracing/tracing_enabled
-$ echo 128000 > /debug/tracing/trace_entries
-$ echo 1 > /debug/tracing/tracing_enabled
+$ echo 128000 > /debug/tracing/buffer_size_kb
 Then start again from the top.
 
 If you are doing a trace for a driver project, e.g. Nouveau, you should also
diff --git a/Documentation/vm/kmemtrace.txt b/Documentation/vm/kmemtrace.txt
new file mode 100644
index 0000000..a956d9b
--- /dev/null
+++ b/Documentation/vm/kmemtrace.txt
@@ -0,0 +1,126 @@
+			kmemtrace - Kernel Memory Tracer
+
+			  by Eduard - Gabriel Munteanu
+			     <eduard.munteanu@linux360.ro>
+
+I. Introduction
+===============
+
+kmemtrace helps kernel developers figure out two things:
+1) how different allocators (SLAB, SLUB etc.) perform
+2) how kernel code allocates memory and how much
+
+To do this, we trace every allocation and export information to the userspace
+through the relay interface. We export things such as the number of requested
+bytes, the number of bytes actually allocated (i.e. including internal
+fragmentation), whether this is a slab allocation or a plain kmalloc() and so
+on.
+
+The actual analysis is performed by a userspace tool (see section III for
+details on where to get it from). It logs the data exported by the kernel,
+processes it and (as of writing this) can provide the following information:
+- the total amount of memory allocated and fragmentation per call-site
+- the amount of memory allocated and fragmentation per allocation
+- total memory allocated and fragmentation in the collected dataset
+- number of cross-CPU allocation and frees (makes sense in NUMA environments)
+
+Moreover, it can potentially find inconsistent and erroneous behavior in
+kernel code, such as using slab free functions on kmalloc'ed memory or
+allocating less memory than requested (but not truly failed allocations).
+
+kmemtrace also makes provisions for tracing on some arch and analysing the
+data on another.
+
+II. Design and goals
+====================
+
+kmemtrace was designed to handle rather large amounts of data. Thus, it uses
+the relay interface to export whatever is logged to userspace, which then
+stores it. Analysis and reporting is done asynchronously, that is, after the
+data is collected and stored. By design, it allows one to log and analyse
+on different machines and different arches.
+
+As of writing this, the ABI is not considered stable, though it might not
+change much. However, no guarantees are made about compatibility yet. When
+deemed stable, the ABI should still allow easy extension while maintaining
+backward compatibility. This is described further in Documentation/ABI.
+
+Summary of design goals:
+	- allow logging and analysis to be done across different machines
+	- be fast and anticipate usage in high-load environments (*)
+	- be reasonably extensible
+	- make it possible for GNU/Linux distributions to have kmemtrace
+	included in their repositories
+
+(*) - one of the reasons Pekka Enberg's original userspace data analysis
+    tool's code was rewritten from Perl to C (although this is more than a
+    simple conversion)
+
+
+III. Quick usage guide
+======================
+
+1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
+CONFIG_KMEMTRACE).
+
+2) Get the userspace tool and build it:
+$ git-clone git://repo.or.cz/kmemtrace-user.git		# current repository
+$ cd kmemtrace-user/
+$ ./autogen.sh
+$ ./configure
+$ make
+
+3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
+'single' runlevel (so that relay buffers don't fill up easily), and run
+kmemtrace:
+# '$' does not mean user, but root here.
+$ mount -t debugfs none /sys/kernel/debug
+$ mount -t proc none /proc
+$ cd path/to/kmemtrace-user/
+$ ./kmemtraced
+Wait a bit, then stop it with CTRL+C.
+$ cat /sys/kernel/debug/kmemtrace/total_overruns	# Check if we didn't
+							# overrun, should
+							# be zero.
+$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
+		check its correctness]
+$ ./kmemtrace-report
+
+Now you should have a nice and short summary of how the allocator performs.
+
+IV. FAQ and known issues
+========================
+
+Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
+this? Should I worry?
+A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
+large the number is. You can fix it by supplying a higher
+'kmemtrace.subbufs=N' kernel parameter.
+---
+
+Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
+A: This is a bug and should be reported. It can occur for a variety of
+reasons:
+	- possible bugs in relay code
+	- possible misuse of relay by kmemtrace
+	- timestamps being collected unorderly
+Or you may fix it yourself and send us a patch.
+---
+
+Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
+A: This is a known issue and I'm working on it. These might be true errors
+in kernel code, which may have inconsistent behavior (e.g. allocating memory
+with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
+out this behavior may work with SLAB, but may fail with other allocators.
+
+It may also be due to lack of tracing in some unusual allocator functions.
+
+We don't want bug reports regarding this issue yet.
+---
+
+V. See also
+===========
+
+Documentation/kernel-parameters.txt
+Documentation/ABI/testing/debugfs-kmemtrace
+
diff --git a/MAINTAINERS b/MAINTAINERS
index 6f65a26..9b75022 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2590,6 +2590,12 @@ M:	jason.wessel@windriver.com
 L:	kgdb-bugreport@lists.sourceforge.net
 S:	Maintained
 
+KMEMTRACE
+P:	Eduard - Gabriel Munteanu
+M:	eduard.munteanu@linux360.ro
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+
 KPROBES
 P:	Ananth N Mavinakayanahalli
 M:	ananth@in.ibm.com
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 10d6cc3..e1983fa 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -174,28 +174,8 @@ config IOMMU_LEAK
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
 
-config MMIOTRACE
-	bool "Memory mapped IO tracing"
-	depends on DEBUG_KERNEL && PCI
-	select TRACING
-	help
-	  Mmiotrace traces Memory Mapped I/O access and is meant for
-	  debugging and reverse engineering. It is called from the ioremap
-	  implementation and works via page faults. Tracing is disabled by
-	  default and can be enabled at run-time.
-
-	  See Documentation/tracers/mmiotrace.txt.
-	  If you are not helping to develop drivers, say N.
-
-config MMIOTRACE_TEST
-	tristate "Test module for mmiotrace"
-	depends on MMIOTRACE && m
-	help
-	  This is a dumb module for testing mmiotrace. It is very dangerous
-	  as it will write garbage to IO memory starting at a given address.
-	  However, it should be safe to use on e.g. unused portion of VRAM.
-
-	  Say N, unless you absolutely know what you are doing.
+config HAVE_MMIOTRACE_SUPPORT
+	def_bool y
 
 #
 # IO delay types:
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f..c7da368 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -55,7 +55,8 @@ config KVM_AMD
 
 config KVM_TRACE
 	bool "KVM trace support"
-	depends on KVM && MARKERS && SYSFS
+	depends on KVM && SYSFS
+	select MARKERS
 	select RELAY
 	select DEBUG_FS
 	default n
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index d41b9f6..985dc8f 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -283,7 +283,7 @@ static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
 }
 static struct sysrq_key_op sysrq_ftrace_dump_op = {
 	.handler	= sysrq_ftrace_dump,
-	.help_msg	= "dumpZ-ftrace-buffer",
+	.help_msg	= "dump-ftrace-buffer(Z)",
 	.action_msg	= "Dump ftrace buffer",
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 39c3a5e..455f9af 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,6 +14,7 @@
 #include <asm/page.h>		/* kmalloc_sizes.h needs PAGE_SIZE */
 #include <asm/cache.h>		/* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <linux/compiler.h>
+#include <trace/kmemtrace.h>
 
 /* Size description struct for general caches. */
 struct cache_sizes {
@@ -28,8 +29,26 @@ extern struct cache_sizes malloc_sizes[];
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
-static inline void *kmalloc(size_t size, gfp_t flags)
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags);
+extern size_t slab_buffer_size(struct kmem_cache *cachep);
+#else
+static __always_inline void *
+kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
 {
+	return kmem_cache_alloc(cachep, flags);
+}
+static inline size_t slab_buffer_size(struct kmem_cache *cachep)
+{
+	return 0;
+}
+#endif
+
+static __always_inline void *kmalloc(size_t size, gfp_t flags)
+{
+	struct kmem_cache *cachep;
+	void *ret;
+
 	if (__builtin_constant_p(size)) {
 		int i = 0;
 
@@ -50,10 +69,17 @@ static inline void *kmalloc(size_t size, gfp_t flags)
 found:
 #ifdef CONFIG_ZONE_DMA
 		if (flags & GFP_DMA)
-			return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep,
-						flags);
+			cachep = malloc_sizes[i].cs_dmacachep;
+		else
 #endif
-		return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags);
+			cachep = malloc_sizes[i].cs_cachep;
+
+		ret = kmem_cache_alloc_notrace(cachep, flags);
+
+		kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
+				     size, slab_buffer_size(cachep), flags);
+
+		return ret;
 	}
 	return __kmalloc(size, flags);
 }
@@ -62,8 +88,25 @@ found:
 extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
 extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+					   gfp_t flags,
+					   int nodeid);
+#else
+static __always_inline void *
+kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+			      gfp_t flags,
+			      int nodeid)
+{
+	return kmem_cache_alloc_node(cachep, flags, nodeid);
+}
+#endif
+
+static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
+	struct kmem_cache *cachep;
+	void *ret;
+
 	if (__builtin_constant_p(size)) {
 		int i = 0;
 
@@ -84,11 +127,18 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 found:
 #ifdef CONFIG_ZONE_DMA
 		if (flags & GFP_DMA)
-			return kmem_cache_alloc_node(malloc_sizes[i].cs_dmacachep,
-						flags, node);
+			cachep = malloc_sizes[i].cs_dmacachep;
+		else
 #endif
-		return kmem_cache_alloc_node(malloc_sizes[i].cs_cachep,
-						flags, node);
+			cachep = malloc_sizes[i].cs_cachep;
+
+		ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
+
+		kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_,
+					  ret, size, slab_buffer_size(cachep),
+					  flags, node);
+
+		return ret;
 	}
 	return __kmalloc_node(size, flags, node);
 }
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 59a3fa4..0ec00b3 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -3,14 +3,15 @@
 
 void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
-static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
+					      gfp_t flags)
 {
 	return kmem_cache_alloc_node(cachep, flags, -1);
 }
 
 void *__kmalloc_node(size_t size, gfp_t flags, int node);
 
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	return __kmalloc_node(size, flags, node);
 }
@@ -23,12 +24,12 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
  * kmalloc is the normal method of allocating memory
  * in the kernel.
  */
-static inline void *kmalloc(size_t size, gfp_t flags)
+static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
 	return __kmalloc_node(size, flags, -1);
 }
 
-static inline void *__kmalloc(size_t size, gfp_t flags)
+static __always_inline void *__kmalloc(size_t size, gfp_t flags)
 {
 	return kmalloc(size, flags);
 }
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 2f5c16b..6b657f7 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,6 +10,7 @@
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
+#include <trace/kmemtrace.h>
 
 enum stat_item {
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
@@ -204,13 +205,31 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
+#else
+static __always_inline void *
+kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+{
+	return kmem_cache_alloc(s, gfpflags);
+}
+#endif
+
 static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
 {
-	return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size));
+	unsigned int order = get_order(size);
+	void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+
+	kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
+			     size, PAGE_SIZE << order, flags);
+
+	return ret;
 }
 
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
+	void *ret;
+
 	if (__builtin_constant_p(size)) {
 		if (size > PAGE_SIZE)
 			return kmalloc_large(size, flags);
@@ -221,7 +240,13 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
 			if (!s)
 				return ZERO_SIZE_PTR;
 
-			return kmem_cache_alloc(s, flags);
+			ret = kmem_cache_alloc_notrace(s, flags);
+
+			kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
+					     _THIS_IP_, ret,
+					     size, s->size, flags);
+
+			return ret;
 		}
 	}
 	return __kmalloc(size, flags);
@@ -231,8 +256,24 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
 void *__kmalloc_node(size_t size, gfp_t flags, int node);
 void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+					   gfp_t gfpflags,
+					   int node);
+#else
+static __always_inline void *
+kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+			      gfp_t gfpflags,
+			      int node)
+{
+	return kmem_cache_alloc_node(s, gfpflags, node);
+}
+#endif
+
 static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
+	void *ret;
+
 	if (__builtin_constant_p(size) &&
 		size <= PAGE_SIZE && !(flags & SLUB_DMA)) {
 			struct kmem_cache *s = kmalloc_slab(size);
@@ -240,7 +281,13 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 		if (!s)
 			return ZERO_SIZE_PTR;
 
-		return kmem_cache_alloc_node(s, flags, node);
+		ret = kmem_cache_alloc_node_notrace(s, flags, node);
+
+		kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+					  _THIS_IP_, ret,
+					  size, s->size, flags, node);
+
+		return ret;
 	}
 	return __kmalloc_node(size, flags, node);
 }
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
new file mode 100644
index 0000000..ad8b785
--- /dev/null
+++ b/include/trace/kmemtrace.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ *
+ * This file is released under GPL version 2.
+ */
+
+#ifndef _LINUX_KMEMTRACE_H
+#define _LINUX_KMEMTRACE_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/marker.h>
+
+enum kmemtrace_type_id {
+	KMEMTRACE_TYPE_KMALLOC = 0,	/* kmalloc() or kfree(). */
+	KMEMTRACE_TYPE_CACHE,		/* kmem_cache_*(). */
+	KMEMTRACE_TYPE_PAGES,		/* __get_free_pages() and friends. */
+};
+
+#ifdef CONFIG_KMEMTRACE
+
+extern void kmemtrace_init(void);
+
+extern void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+					     unsigned long call_site,
+					     const void *ptr,
+					     size_t bytes_req,
+					     size_t bytes_alloc,
+					     gfp_t gfp_flags,
+					     int node);
+
+extern void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+				       unsigned long call_site,
+				       const void *ptr);
+
+#else /* CONFIG_KMEMTRACE */
+
+static inline void kmemtrace_init(void)
+{
+}
+
+static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+					     unsigned long call_site,
+					     const void *ptr,
+					     size_t bytes_req,
+					     size_t bytes_alloc,
+					     gfp_t gfp_flags,
+					     int node)
+{
+}
+
+static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+				       unsigned long call_site,
+				       const void *ptr)
+{
+}
+
+#endif /* CONFIG_KMEMTRACE */
+
+static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id,
+					unsigned long call_site,
+					const void *ptr,
+					size_t bytes_req,
+					size_t bytes_alloc,
+					gfp_t gfp_flags)
+{
+	kmemtrace_mark_alloc_node(type_id, call_site, ptr,
+				  bytes_req, bytes_alloc, gfp_flags, -1);
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_KMEMTRACE_H */
+
diff --git a/init/main.c b/init/main.c
index 8442094..db7974f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,7 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
+#include <trace/kmemtrace.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/smp.h>
@@ -641,6 +642,7 @@ asmlinkage void __init start_kernel(void)
 	enable_debug_pagealloc();
 	cpu_hotplug_init();
 	kmem_cache_init();
+	kmemtrace_init();
 	debug_objects_mem_init();
 	idr_init_cache();
 	setup_per_cpu_pageset();
diff --git a/kernel/relay.c b/kernel/relay.c
index 09ac200..d064506 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -675,9 +675,7 @@ int relay_late_setup_files(struct rchan *chan,
 	 */
 	for_each_online_cpu(i) {
 		if (unlikely(!chan->buf[i])) {
-			printk(KERN_ERR "relay_late_setup_files: CPU %u "
-					"has no buffer, it must have!\n", i);
-			BUG();
+			WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
 			err = -EINVAL;
 			break;
 		}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e2a4ff6..9442392 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -264,6 +264,27 @@ config HW_BRANCH_TRACER
 	  This tracer records all branches on the system in a circular
 	  buffer giving access to the last N branches for each cpu.
 
+config KMEMTRACE
+	bool "Trace SLAB allocations"
+	select TRACING
+	help
+	  kmemtrace provides tracing for slab allocator functions, such as
+	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
+	  data is then fed to the userspace application in order to analyse
+	  allocation hotspots, internal fragmentation and so on, making it
+	  possible to see how well an allocator performs, as well as debug
+	  and profile kernel code.
+
+	  This requires an userspace application to use. See
+	  Documentation/vm/kmemtrace.txt for more information.
+
+	  Saying Y will make the kernel somewhat larger and slower. However,
+	  if you disable kmemtrace at run-time or boot-time, the performance
+	  impact is minimal (depending on the arch the kernel is built for).
+
+	  If unsure, say N.
+
+
 config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
 	depends on FUNCTION_TRACER
@@ -302,4 +323,27 @@ config FTRACE_STARTUP_TEST
 	  functioning properly. It will do tests on all the configured
 	  tracers of ftrace.
 
+config MMIOTRACE
+	bool "Memory mapped IO tracing"
+	depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI
+	select TRACING
+	help
+	  Mmiotrace traces Memory Mapped I/O access and is meant for
+	  debugging and reverse engineering. It is called from the ioremap
+	  implementation and works via page faults. Tracing is disabled by
+	  default and can be enabled at run-time.
+
+	  See Documentation/tracers/mmiotrace.txt.
+	  If you are not helping to develop drivers, say N.
+
+config MMIOTRACE_TEST
+	tristate "Test module for mmiotrace"
+	depends on MMIOTRACE && m
+	help
+	  This is a dumb module for testing mmiotrace. It is very dangerous
+	  as it will write garbage to IO memory starting at a given address.
+	  However, it should be safe to use on e.g. unused portion of VRAM.
+
+	  Say N, unless you absolutely know what you are doing.
+
 endmenu
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 349d5a9..05c9182 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,8 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
 
 obj-$(CONFIG_TRACING) += trace.o
+obj-$(CONFIG_TRACING) += trace_output.o
+obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
 obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
@@ -33,5 +35,6 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
 obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
 obj-$(CONFIG_POWER_TRACER) += trace_power.o
+obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2f32969..9e54a6c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -289,7 +289,7 @@ static DEFINE_MUTEX(ftrace_regex_lock);
 
 struct ftrace_page {
 	struct ftrace_page	*next;
-	unsigned long		index;
+	int			index;
 	struct dyn_ftrace	records[];
 };
 
@@ -786,7 +786,7 @@ enum {
 
 struct ftrace_iterator {
 	struct ftrace_page	*pg;
-	unsigned		idx;
+	int			idx;
 	unsigned		flags;
 	unsigned char		buffer[FTRACE_BUFF_MAX+1];
 	unsigned		buffer_idx;
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
new file mode 100644
index 0000000..faaa5ae
--- /dev/null
+++ b/kernel/trace/kmemtrace.c
@@ -0,0 +1,350 @@
+/*
+ * Memory allocator tracing
+ *
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+#include <linux/dcache.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <trace/kmemtrace.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+/* Select an alternative, minimalistic output than the original one */
+#define TRACE_KMEM_OPT_MINIMAL	0x1
+
+static struct tracer_opt kmem_opts[] = {
+	/* Default disable the minimalistic output */
+	{ TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
+	{ }
+};
+
+static struct tracer_flags kmem_tracer_flags = {
+	.val = 0,
+	.opts = kmem_opts
+};
+
+
+static bool kmem_tracing_enabled __read_mostly;
+static struct trace_array *kmemtrace_array;
+
+static int kmem_trace_init(struct trace_array *tr)
+{
+	int cpu;
+	kmemtrace_array = tr;
+
+	for_each_cpu_mask(cpu, cpu_possible_map)
+		tracing_reset(tr, cpu);
+
+	kmem_tracing_enabled = true;
+
+	return 0;
+}
+
+static void kmem_trace_reset(struct trace_array *tr)
+{
+	kmem_tracing_enabled = false;
+}
+
+static void kmemtrace_headers(struct seq_file *s)
+{
+	/* Don't need headers for the original kmemtrace output */
+	if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
+		return;
+
+	seq_printf(s, "#\n");
+	seq_printf(s, "# ALLOC  TYPE  REQ   GIVEN  FLAGS     "
+			"      POINTER         NODE    CALLER\n");
+	seq_printf(s, "# FREE   |      |     |       |       "
+			"       |   |            |        |\n");
+	seq_printf(s, "# |\n\n");
+}
+
+/*
+ * The two following functions give the original output from kmemtrace,
+ * or something close to....perhaps they need some missing things
+ */
+static enum print_line_t
+kmemtrace_print_alloc_original(struct trace_iterator *iter,
+				struct kmemtrace_alloc_entry *entry)
+{
+	struct trace_seq *s = &iter->seq;
+	int ret;
+
+	/* Taken from the old linux/kmemtrace.h */
+	ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu "
+	  "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
+	   entry->type_id, entry->call_site, (unsigned long) entry->ptr,
+	   (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc,
+	   (unsigned long) entry->gfp_flags, entry->node);
+
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+kmemtrace_print_free_original(struct trace_iterator *iter,
+				struct kmemtrace_free_entry *entry)
+{
+	struct trace_seq *s = &iter->seq;
+	int ret;
+
+	/* Taken from the old linux/kmemtrace.h */
+	ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n",
+	   entry->type_id, entry->call_site, (unsigned long) entry->ptr);
+
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+
+/* The two other following provide a more minimalistic output */
+static enum print_line_t
+kmemtrace_print_alloc_compress(struct trace_iterator *iter,
+					struct kmemtrace_alloc_entry *entry)
+{
+	struct trace_seq *s = &iter->seq;
+	int ret;
+
+	/* Alloc entry */
+	ret = trace_seq_printf(s, "  +      ");
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Type */
+	switch (entry->type_id) {
+	case KMEMTRACE_TYPE_KMALLOC:
+		ret = trace_seq_printf(s, "K   ");
+		break;
+	case KMEMTRACE_TYPE_CACHE:
+		ret = trace_seq_printf(s, "C   ");
+		break;
+	case KMEMTRACE_TYPE_PAGES:
+		ret = trace_seq_printf(s, "P   ");
+		break;
+	default:
+		ret = trace_seq_printf(s, "?   ");
+	}
+
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Requested */
+	ret = trace_seq_printf(s, "%4d   ", entry->bytes_req);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Allocated */
+	ret = trace_seq_printf(s, "%4d   ", entry->bytes_alloc);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Flags
+	 * TODO: would be better to see the name of the GFP flag names
+	 */
+	ret = trace_seq_printf(s, "%08x   ", entry->gfp_flags);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Pointer to allocated */
+	ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Node */
+	ret = trace_seq_printf(s, "%4d   ", entry->node);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Call site */
+	ret = seq_print_ip_sym(s, entry->call_site, 0);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	if (!trace_seq_printf(s, "\n"))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+kmemtrace_print_free_compress(struct trace_iterator *iter,
+				struct kmemtrace_free_entry *entry)
+{
+	struct trace_seq *s = &iter->seq;
+	int ret;
+
+	/* Free entry */
+	ret = trace_seq_printf(s, "  -      ");
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Type */
+	switch (entry->type_id) {
+	case KMEMTRACE_TYPE_KMALLOC:
+		ret = trace_seq_printf(s, "K     ");
+		break;
+	case KMEMTRACE_TYPE_CACHE:
+		ret = trace_seq_printf(s, "C     ");
+		break;
+	case KMEMTRACE_TYPE_PAGES:
+		ret = trace_seq_printf(s, "P     ");
+		break;
+	default:
+		ret = trace_seq_printf(s, "?     ");
+	}
+
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Skip requested/allocated/flags */
+	ret = trace_seq_printf(s, "                       ");
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Pointer to allocated */
+	ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Skip node */
+	ret = trace_seq_printf(s, "       ");
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Call site */
+	ret = seq_print_ip_sym(s, entry->call_site, 0);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	if (!trace_seq_printf(s, "\n"))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
+{
+	struct trace_entry *entry = iter->ent;
+
+	switch (entry->type) {
+	case TRACE_KMEM_ALLOC: {
+		struct kmemtrace_alloc_entry *field;
+		trace_assign_type(field, entry);
+		if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
+			return kmemtrace_print_alloc_compress(iter, field);
+		else
+			return kmemtrace_print_alloc_original(iter, field);
+	}
+
+	case TRACE_KMEM_FREE: {
+		struct kmemtrace_free_entry *field;
+		trace_assign_type(field, entry);
+		if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
+			return kmemtrace_print_free_compress(iter, field);
+		else
+			return kmemtrace_print_free_original(iter, field);
+	}
+
+	default:
+		return TRACE_TYPE_UNHANDLED;
+	}
+}
+
+/* Trace allocations */
+void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+			     unsigned long call_site,
+			     const void *ptr,
+			     size_t bytes_req,
+			     size_t bytes_alloc,
+			     gfp_t gfp_flags,
+			     int node)
+{
+	struct ring_buffer_event *event;
+	struct kmemtrace_alloc_entry *entry;
+	struct trace_array *tr = kmemtrace_array;
+	unsigned long irq_flags;
+
+	if (!kmem_tracing_enabled)
+		return;
+
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, 0, 0);
+
+	entry->ent.type = TRACE_KMEM_ALLOC;
+	entry->call_site = call_site;
+	entry->ptr = ptr;
+	entry->bytes_req = bytes_req;
+	entry->bytes_alloc = bytes_alloc;
+	entry->gfp_flags = gfp_flags;
+	entry->node	=	node;
+
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+	trace_wake_up();
+}
+EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
+
+void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+		       unsigned long call_site,
+		       const void *ptr)
+{
+	struct ring_buffer_event *event;
+	struct kmemtrace_free_entry *entry;
+	struct trace_array *tr = kmemtrace_array;
+	unsigned long irq_flags;
+
+	if (!kmem_tracing_enabled)
+		return;
+
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, 0, 0);
+
+	entry->ent.type = TRACE_KMEM_FREE;
+	entry->type_id	= type_id;
+	entry->call_site = call_site;
+	entry->ptr = ptr;
+
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+	trace_wake_up();
+}
+EXPORT_SYMBOL(kmemtrace_mark_free);
+
+static struct tracer kmem_tracer __read_mostly = {
+	.name		= "kmemtrace",
+	.init		= kmem_trace_init,
+	.reset		= kmem_trace_reset,
+	.print_line	= kmemtrace_print_line,
+	.print_header = kmemtrace_headers,
+	.flags		= &kmem_tracer_flags
+};
+
+void kmemtrace_init(void)
+{
+	/* earliest opportunity to start kmem tracing */
+}
+
+static int __init init_kmem_tracer(void)
+{
+	return register_tracer(&kmem_tracer);
+}
+
+device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8b0daf0..4832ffa 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -123,8 +123,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
 EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
 
 #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
-#define RB_ALIGNMENT_SHIFT	2
-#define RB_ALIGNMENT		(1 << RB_ALIGNMENT_SHIFT)
+#define RB_ALIGNMENT		4U
 #define RB_MAX_SMALL_DATA	28
 
 enum {
@@ -133,7 +132,7 @@ enum {
 };
 
 /* inline for ring buffer fast paths */
-static inline unsigned
+static unsigned
 rb_event_length(struct ring_buffer_event *event)
 {
 	unsigned length;
@@ -151,7 +150,7 @@ rb_event_length(struct ring_buffer_event *event)
 
 	case RINGBUF_TYPE_DATA:
 		if (event->len)
-			length = event->len << RB_ALIGNMENT_SHIFT;
+			length = event->len * RB_ALIGNMENT;
 		else
 			length = event->array[0];
 		return length + RB_EVNT_HDR_SIZE;
@@ -179,7 +178,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 
 /* inline for ring buffer fast paths */
-static inline void *
+static void *
 rb_event_data(struct ring_buffer_event *event)
 {
 	BUG_ON(event->type != RINGBUF_TYPE_DATA);
@@ -229,10 +228,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
  * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
  * this issue out.
  */
-static inline void free_buffer_page(struct buffer_page *bpage)
+static void free_buffer_page(struct buffer_page *bpage)
 {
-	if (bpage->page)
-		free_page((unsigned long)bpage->page);
+	free_page((unsigned long)bpage->page);
 	kfree(bpage);
 }
 
@@ -811,7 +809,7 @@ rb_event_index(struct ring_buffer_event *event)
 	return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
 }
 
-static inline int
+static int
 rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
 	     struct ring_buffer_event *event)
 {
@@ -825,7 +823,7 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
 		rb_commit_index(cpu_buffer) == index;
 }
 
-static inline void
+static void
 rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
 		    struct ring_buffer_event *event)
 {
@@ -850,7 +848,7 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
 	local_set(&cpu_buffer->commit_page->page->commit, index);
 }
 
-static inline void
+static void
 rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	/*
@@ -896,7 +894,7 @@ static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	cpu_buffer->reader_page->read = 0;
 }
 
-static inline void rb_inc_iter(struct ring_buffer_iter *iter)
+static void rb_inc_iter(struct ring_buffer_iter *iter)
 {
 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
 
@@ -926,7 +924,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
  * and with this, we can determine what to place into the
  * data field.
  */
-static inline void
+static void
 rb_update_event(struct ring_buffer_event *event,
 			 unsigned type, unsigned length)
 {
@@ -938,15 +936,11 @@ rb_update_event(struct ring_buffer_event *event,
 		break;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
-		event->len =
-			(RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
-			>> RB_ALIGNMENT_SHIFT;
+		event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
 		break;
 
 	case RINGBUF_TYPE_TIME_STAMP:
-		event->len =
-			(RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
-			>> RB_ALIGNMENT_SHIFT;
+		event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
 		break;
 
 	case RINGBUF_TYPE_DATA:
@@ -955,16 +949,14 @@ rb_update_event(struct ring_buffer_event *event,
 			event->len = 0;
 			event->array[0] = length;
 		} else
-			event->len =
-				(length + (RB_ALIGNMENT-1))
-				>> RB_ALIGNMENT_SHIFT;
+			event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
 		break;
 	default:
 		BUG();
 	}
 }
 
-static inline unsigned rb_calculate_event_length(unsigned length)
+static unsigned rb_calculate_event_length(unsigned length)
 {
 	struct ring_buffer_event event; /* Used only for sizeof array */
 
@@ -1438,7 +1430,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(ring_buffer_write);
 
-static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	struct buffer_page *reader = cpu_buffer->reader_page;
 	struct buffer_page *head = cpu_buffer->head_page;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c580233..0418fc3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -37,6 +37,7 @@
 #include <linux/irqflags.h>
 
 #include "trace.h"
+#include "trace_output.h"
 
 #define TRACE_BUFFER_FLAGS	(RB_FL_OVERWRITE)
 
@@ -329,132 +330,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	tracing_record_cmdline(current);
 }
 
-/**
- * trace_seq_printf - sequence printing of trace information
- * @s: trace sequence descriptor
- * @fmt: printf format string
- *
- * The tracer may use either sequence operations or its own
- * copy to user routines. To simplify formating of a trace
- * trace_seq_printf is used to store strings into a special
- * buffer (@s). Then the output may be either used by
- * the sequencer or pulled into another buffer.
- */
-int
-trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-{
-	int len = (PAGE_SIZE - 1) - s->len;
-	va_list ap;
-	int ret;
-
-	if (!len)
-		return 0;
-
-	va_start(ap, fmt);
-	ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
-	va_end(ap);
-
-	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len)
-		return 0;
-
-	s->len += ret;
-
-	return len;
-}
-
-/**
- * trace_seq_puts - trace sequence printing of simple string
- * @s: trace sequence descriptor
- * @str: simple string to record
- *
- * The tracer may use either the sequence operations or its own
- * copy to user routines. This function records a simple string
- * into a special buffer (@s) for later retrieval by a sequencer
- * or other mechanism.
- */
-static int
-trace_seq_puts(struct trace_seq *s, const char *str)
-{
-	int len = strlen(str);
-
-	if (len > ((PAGE_SIZE - 1) - s->len))
-		return 0;
-
-	memcpy(s->buffer + s->len, str, len);
-	s->len += len;
-
-	return len;
-}
-
-static int
-trace_seq_putc(struct trace_seq *s, unsigned char c)
-{
-	if (s->len >= (PAGE_SIZE - 1))
-		return 0;
-
-	s->buffer[s->len++] = c;
-
-	return 1;
-}
-
-static int
-trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
-{
-	if (len > ((PAGE_SIZE - 1) - s->len))
-		return 0;
-
-	memcpy(s->buffer + s->len, mem, len);
-	s->len += len;
-
-	return len;
-}
-
-#define MAX_MEMHEX_BYTES	8
-#define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
-
-static int
-trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
-{
-	unsigned char hex[HEX_CHARS];
-	unsigned char *data = mem;
-	int i, j;
-
-#ifdef __BIG_ENDIAN
-	for (i = 0, j = 0; i < len; i++) {
-#else
-	for (i = len-1, j = 0; i >= 0; i--) {
-#endif
-		hex[j++] = hex_asc_hi(data[i]);
-		hex[j++] = hex_asc_lo(data[i]);
-	}
-	hex[j++] = ' ';
-
-	return trace_seq_putmem(s, hex, j);
-}
-
-static int
-trace_seq_path(struct trace_seq *s, struct path *path)
-{
-	unsigned char *p;
-
-	if (s->len >= (PAGE_SIZE - 1))
-		return 0;
-	p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
-	if (!IS_ERR(p)) {
-		p = mangle_path(s->buffer + s->len, p, "\n");
-		if (p) {
-			s->len = p - s->buffer;
-			return 1;
-		}
-	} else {
-		s->buffer[s->len++] = '?';
-		return 1;
-	}
-
-	return 0;
-}
-
 static void
 trace_seq_reset(struct trace_seq *s)
 {
@@ -1472,154 +1347,6 @@ static void s_stop(struct seq_file *m, void *p)
 	mutex_unlock(&trace_types_lock);
 }
 
-#ifdef CONFIG_KRETPROBES
-static inline const char *kretprobed(const char *name)
-{
-	static const char tramp_name[] = "kretprobe_trampoline";
-	int size = sizeof(tramp_name);
-
-	if (strncmp(tramp_name, name, size) == 0)
-		return "[unknown/kretprobe'd]";
-	return name;
-}
-#else
-static inline const char *kretprobed(const char *name)
-{
-	return name;
-}
-#endif /* CONFIG_KRETPROBES */
-
-static int
-seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
-{
-#ifdef CONFIG_KALLSYMS
-	char str[KSYM_SYMBOL_LEN];
-	const char *name;
-
-	kallsyms_lookup(address, NULL, NULL, NULL, str);
-
-	name = kretprobed(str);
-
-	return trace_seq_printf(s, fmt, name);
-#endif
-	return 1;
-}
-
-static int
-seq_print_sym_offset(struct trace_seq *s, const char *fmt,
-		     unsigned long address)
-{
-#ifdef CONFIG_KALLSYMS
-	char str[KSYM_SYMBOL_LEN];
-	const char *name;
-
-	sprint_symbol(str, address);
-	name = kretprobed(str);
-
-	return trace_seq_printf(s, fmt, name);
-#endif
-	return 1;
-}
-
-#ifndef CONFIG_64BIT
-# define IP_FMT "%08lx"
-#else
-# define IP_FMT "%016lx"
-#endif
-
-int
-seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
-{
-	int ret;
-
-	if (!ip)
-		return trace_seq_printf(s, "0");
-
-	if (sym_flags & TRACE_ITER_SYM_OFFSET)
-		ret = seq_print_sym_offset(s, "%s", ip);
-	else
-		ret = seq_print_sym_short(s, "%s", ip);
-
-	if (!ret)
-		return 0;
-
-	if (sym_flags & TRACE_ITER_SYM_ADDR)
-		ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
-	return ret;
-}
-
-static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
-				    unsigned long ip, unsigned long sym_flags)
-{
-	struct file *file = NULL;
-	unsigned long vmstart = 0;
-	int ret = 1;
-
-	if (mm) {
-		const struct vm_area_struct *vma;
-
-		down_read(&mm->mmap_sem);
-		vma = find_vma(mm, ip);
-		if (vma) {
-			file = vma->vm_file;
-			vmstart = vma->vm_start;
-		}
-		if (file) {
-			ret = trace_seq_path(s, &file->f_path);
-			if (ret)
-				ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
-		}
-		up_read(&mm->mmap_sem);
-	}
-	if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
-		ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
-	return ret;
-}
-
-static int
-seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
-		      unsigned long sym_flags)
-{
-	struct mm_struct *mm = NULL;
-	int ret = 1;
-	unsigned int i;
-
-	if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
-		struct task_struct *task;
-		/*
-		 * we do the lookup on the thread group leader,
-		 * since individual threads might have already quit!
-		 */
-		rcu_read_lock();
-		task = find_task_by_vpid(entry->ent.tgid);
-		if (task)
-			mm = get_task_mm(task);
-		rcu_read_unlock();
-	}
-
-	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-		unsigned long ip = entry->caller[i];
-
-		if (ip == ULONG_MAX || !ret)
-			break;
-		if (i && ret)
-			ret = trace_seq_puts(s, " <- ");
-		if (!ip) {
-			if (ret)
-				ret = trace_seq_puts(s, "??");
-			continue;
-		}
-		if (!ret)
-			break;
-		if (ret)
-			ret = seq_print_user_ip(s, mm, ip, sym_flags);
-	}
-
-	if (mm)
-		mmput(mm);
-	return ret;
-}
-
 static void print_lat_help_header(struct seq_file *m)
 {
 	seq_puts(m, "#                  _------=> CPU#            \n");
@@ -1755,52 +1482,6 @@ lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
 		trace_seq_puts(s, " : ");
 }
 
-static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
-
-static int task_state_char(unsigned long state)
-{
-	int bit = state ? __ffs(state) + 1 : 0;
-
-	return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
-}
-
-/*
- * The message is supposed to contain an ending newline.
- * If the printing stops prematurely, try to add a newline of our own.
- */
-void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
-{
-	struct trace_entry *ent;
-	struct trace_field_cont *cont;
-	bool ok = true;
-
-	ent = peek_next_entry(iter, iter->cpu, NULL);
-	if (!ent || ent->type != TRACE_CONT) {
-		trace_seq_putc(s, '\n');
-		return;
-	}
-
-	do {
-		cont = (struct trace_field_cont *)ent;
-		if (ok)
-			ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
-
-		ftrace_disable_cpu();
-
-		if (iter->buffer_iter[iter->cpu])
-			ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
-		else
-			ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
-
-		ftrace_enable_cpu();
-
-		ent = peek_next_entry(iter, iter->cpu, NULL);
-	} while (ent && ent->type == TRACE_CONT);
-
-	if (!ok)
-		trace_seq_putc(s, '\n');
-}
-
 static void test_cpu_buff_start(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
@@ -1824,17 +1505,14 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
 	struct trace_entry *next_entry;
+	struct trace_event *event;
 	unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
 	struct trace_entry *entry = iter->ent;
 	unsigned long abs_usecs;
 	unsigned long rel_usecs;
 	u64 next_ts;
 	char *comm;
-	int S, T;
-	int i;
-
-	if (entry->type == TRACE_CONT)
-		return TRACE_TYPE_HANDLED;
+	int ret;
 
 	test_cpu_buff_start(iter);
 
@@ -1859,96 +1537,16 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 		lat_print_generic(s, entry, cpu);
 		lat_print_timestamp(s, abs_usecs, rel_usecs);
 	}
-	switch (entry->type) {
-	case TRACE_FN: {
-		struct ftrace_entry *field;
-
-		trace_assign_type(field, entry);
-
-		seq_print_ip_sym(s, field->ip, sym_flags);
-		trace_seq_puts(s, " (");
-		seq_print_ip_sym(s, field->parent_ip, sym_flags);
-		trace_seq_puts(s, ")\n");
-		break;
-	}
-	case TRACE_CTX:
-	case TRACE_WAKE: {
-		struct ctx_switch_entry *field;
-
-		trace_assign_type(field, entry);
-
-		T = task_state_char(field->next_state);
-		S = task_state_char(field->prev_state);
-		comm = trace_find_cmdline(field->next_pid);
-		trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
-				 field->prev_pid,
-				 field->prev_prio,
-				 S, entry->type == TRACE_CTX ? "==>" : "  +",
-				 field->next_cpu,
-				 field->next_pid,
-				 field->next_prio,
-				 T, comm);
-		break;
-	}
-	case TRACE_SPECIAL: {
-		struct special_entry *field;
-
-		trace_assign_type(field, entry);
-
-		trace_seq_printf(s, "# %ld %ld %ld\n",
-				 field->arg1,
-				 field->arg2,
-				 field->arg3);
-		break;
-	}
-	case TRACE_STACK: {
-		struct stack_entry *field;
-
-		trace_assign_type(field, entry);
-
-		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-			if (i)
-				trace_seq_puts(s, " <= ");
-			seq_print_ip_sym(s, field->caller[i], sym_flags);
-		}
-		trace_seq_puts(s, "\n");
-		break;
-	}
-	case TRACE_PRINT: {
-		struct print_entry *field;
-
-		trace_assign_type(field, entry);
 
-		seq_print_ip_sym(s, field->ip, sym_flags);
-		trace_seq_printf(s, ": %s", field->buf);
-		if (entry->flags & TRACE_FLAG_CONT)
-			trace_seq_print_cont(s, iter);
-		break;
-	}
-	case TRACE_BRANCH: {
-		struct trace_branch *field;
-
-		trace_assign_type(field, entry);
-
-		trace_seq_printf(s, "[%s] %s:%s:%d\n",
-				 field->correct ? "  ok  " : " MISS ",
-				 field->func,
-				 field->file,
-				 field->line);
-		break;
+	event = ftrace_find_event(entry->type);
+	if (event && event->latency_trace) {
+		ret = event->latency_trace(s, entry, sym_flags);
+		if (ret)
+			return ret;
+		return TRACE_TYPE_HANDLED;
 	}
-	case TRACE_USER_STACK: {
-		struct userstack_entry *field;
-
-		trace_assign_type(field, entry);
 
-		seq_print_userip_objs(field, s, sym_flags);
-		trace_seq_putc(s, '\n');
-		break;
-	}
-	default:
-		trace_seq_printf(s, "Unknown type %d\n", entry->type);
-	}
+	trace_seq_printf(s, "Unknown type %d\n", entry->type);
 	return TRACE_TYPE_HANDLED;
 }
 
@@ -1957,19 +1555,15 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
 	struct trace_entry *entry;
+	struct trace_event *event;
 	unsigned long usec_rem;
 	unsigned long long t;
 	unsigned long secs;
 	char *comm;
 	int ret;
-	int S, T;
-	int i;
 
 	entry = iter->ent;
 
-	if (entry->type == TRACE_CONT)
-		return TRACE_TYPE_HANDLED;
-
 	test_cpu_buff_start(iter);
 
 	comm = trace_find_cmdline(iter->ent->pid);
@@ -1988,129 +1582,17 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	switch (entry->type) {
-	case TRACE_FN: {
-		struct ftrace_entry *field;
-
-		trace_assign_type(field, entry);
-
-		ret = seq_print_ip_sym(s, field->ip, sym_flags);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-		if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
-						field->parent_ip) {
-			ret = trace_seq_printf(s, " <-");
-			if (!ret)
-				return TRACE_TYPE_PARTIAL_LINE;
-			ret = seq_print_ip_sym(s,
-					       field->parent_ip,
-					       sym_flags);
-			if (!ret)
-				return TRACE_TYPE_PARTIAL_LINE;
-		}
-		ret = trace_seq_printf(s, "\n");
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-		break;
-	}
-	case TRACE_CTX:
-	case TRACE_WAKE: {
-		struct ctx_switch_entry *field;
-
-		trace_assign_type(field, entry);
-
-		T = task_state_char(field->next_state);
-		S = task_state_char(field->prev_state);
-		ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
-				       field->prev_pid,
-				       field->prev_prio,
-				       S,
-				       entry->type == TRACE_CTX ? "==>" : "  +",
-				       field->next_cpu,
-				       field->next_pid,
-				       field->next_prio,
-				       T);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-		break;
-	}
-	case TRACE_SPECIAL: {
-		struct special_entry *field;
-
-		trace_assign_type(field, entry);
-
-		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-				 field->arg1,
-				 field->arg2,
-				 field->arg3);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-		break;
-	}
-	case TRACE_STACK: {
-		struct stack_entry *field;
-
-		trace_assign_type(field, entry);
-
-		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-			if (i) {
-				ret = trace_seq_puts(s, " <= ");
-				if (!ret)
-					return TRACE_TYPE_PARTIAL_LINE;
-			}
-			ret = seq_print_ip_sym(s, field->caller[i],
-					       sym_flags);
-			if (!ret)
-				return TRACE_TYPE_PARTIAL_LINE;
-		}
-		ret = trace_seq_puts(s, "\n");
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-		break;
-	}
-	case TRACE_PRINT: {
-		struct print_entry *field;
-
-		trace_assign_type(field, entry);
-
-		seq_print_ip_sym(s, field->ip, sym_flags);
-		trace_seq_printf(s, ": %s", field->buf);
-		if (entry->flags & TRACE_FLAG_CONT)
-			trace_seq_print_cont(s, iter);
-		break;
-	}
-	case TRACE_GRAPH_RET: {
-		return print_graph_function(iter);
-	}
-	case TRACE_GRAPH_ENT: {
-		return print_graph_function(iter);
-	}
-	case TRACE_BRANCH: {
-		struct trace_branch *field;
-
-		trace_assign_type(field, entry);
-
-		trace_seq_printf(s, "[%s] %s:%s:%d\n",
-				 field->correct ? "  ok  " : " MISS ",
-				 field->func,
-				 field->file,
-				 field->line);
-		break;
+	event = ftrace_find_event(entry->type);
+	if (event && event->trace) {
+		ret = event->trace(s, entry, sym_flags);
+		if (ret)
+			return ret;
+		return TRACE_TYPE_HANDLED;
 	}
-	case TRACE_USER_STACK: {
-		struct userstack_entry *field;
-
-		trace_assign_type(field, entry);
+	ret = trace_seq_printf(s, "Unknown type %d\n", entry->type);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
 
-		ret = seq_print_userip_objs(field, s, sym_flags);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-		ret = trace_seq_putc(s, '\n');
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-		break;
-	}
-	}
 	return TRACE_TYPE_HANDLED;
 }
 
@@ -2118,152 +1600,47 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
+	struct trace_event *event;
 	int ret;
-	int S, T;
 
 	entry = iter->ent;
 
-	if (entry->type == TRACE_CONT)
-		return TRACE_TYPE_HANDLED;
-
 	ret = trace_seq_printf(s, "%d %d %llu ",
 		entry->pid, iter->cpu, iter->ts);
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	switch (entry->type) {
-	case TRACE_FN: {
-		struct ftrace_entry *field;
-
-		trace_assign_type(field, entry);
-
-		ret = trace_seq_printf(s, "%x %x\n",
-					field->ip,
-					field->parent_ip);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-		break;
-	}
-	case TRACE_CTX:
-	case TRACE_WAKE: {
-		struct ctx_switch_entry *field;
-
-		trace_assign_type(field, entry);
-
-		T = task_state_char(field->next_state);
-		S = entry->type == TRACE_WAKE ? '+' :
-			task_state_char(field->prev_state);
-		ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
-				       field->prev_pid,
-				       field->prev_prio,
-				       S,
-				       field->next_cpu,
-				       field->next_pid,
-				       field->next_prio,
-				       T);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-		break;
-	}
-	case TRACE_SPECIAL:
-	case TRACE_USER_STACK:
-	case TRACE_STACK: {
-		struct special_entry *field;
-
-		trace_assign_type(field, entry);
-
-		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-				 field->arg1,
-				 field->arg2,
-				 field->arg3);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-		break;
+	event = ftrace_find_event(entry->type);
+	if (event && event->raw) {
+		ret = event->raw(s, entry, 0);
+		if (ret)
+			return ret;
+		return TRACE_TYPE_HANDLED;
 	}
-	case TRACE_PRINT: {
-		struct print_entry *field;
-
-		trace_assign_type(field, entry);
+	ret = trace_seq_printf(s, "%d ?\n", entry->type);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
 
-		trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
-		if (entry->flags & TRACE_FLAG_CONT)
-			trace_seq_print_cont(s, iter);
-		break;
-	}
-	}
 	return TRACE_TYPE_HANDLED;
 }
 
-#define SEQ_PUT_FIELD_RET(s, x)				\
-do {							\
-	if (!trace_seq_putmem(s, &(x), sizeof(x)))	\
-		return 0;				\
-} while (0)
-
-#define SEQ_PUT_HEX_FIELD_RET(s, x)			\
-do {							\
-	BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);	\
-	if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))	\
-		return 0;				\
-} while (0)
-
 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	unsigned char newline = '\n';
 	struct trace_entry *entry;
-	int S, T;
+	struct trace_event *event;
 
 	entry = iter->ent;
 
-	if (entry->type == TRACE_CONT)
-		return TRACE_TYPE_HANDLED;
-
 	SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
 	SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
 
-	switch (entry->type) {
-	case TRACE_FN: {
-		struct ftrace_entry *field;
+	event = ftrace_find_event(entry->type);
+	if (event && event->hex)
+		event->hex(s, entry, 0);
 
-		trace_assign_type(field, entry);
-
-		SEQ_PUT_HEX_FIELD_RET(s, field->ip);
-		SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
-		break;
-	}
-	case TRACE_CTX:
-	case TRACE_WAKE: {
-		struct ctx_switch_entry *field;
-
-		trace_assign_type(field, entry);
-
-		T = task_state_char(field->next_state);
-		S = entry->type == TRACE_WAKE ? '+' :
-			task_state_char(field->prev_state);
-		SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
-		SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
-		SEQ_PUT_HEX_FIELD_RET(s, S);
-		SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
-		SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
-		SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
-		SEQ_PUT_HEX_FIELD_RET(s, T);
-		break;
-	}
-	case TRACE_SPECIAL:
-	case TRACE_USER_STACK:
-	case TRACE_STACK: {
-		struct special_entry *field;
-
-		trace_assign_type(field, entry);
-
-		SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
-		SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
-		SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
-		break;
-	}
-	}
 	SEQ_PUT_FIELD_RET(s, newline);
 
 	return TRACE_TYPE_HANDLED;
@@ -2282,9 +1659,6 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	if (entry->flags & TRACE_FLAG_CONT)
-		trace_seq_print_cont(s, iter);
-
 	return TRACE_TYPE_HANDLED;
 }
 
@@ -2292,53 +1666,19 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry;
+	struct trace_event *event;
 
 	entry = iter->ent;
 
-	if (entry->type == TRACE_CONT)
-		return TRACE_TYPE_HANDLED;
-
 	SEQ_PUT_FIELD_RET(s, entry->pid);
 	SEQ_PUT_FIELD_RET(s, entry->cpu);
 	SEQ_PUT_FIELD_RET(s, iter->ts);
 
-	switch (entry->type) {
-	case TRACE_FN: {
-		struct ftrace_entry *field;
-
-		trace_assign_type(field, entry);
-
-		SEQ_PUT_FIELD_RET(s, field->ip);
-		SEQ_PUT_FIELD_RET(s, field->parent_ip);
-		break;
-	}
-	case TRACE_CTX: {
-		struct ctx_switch_entry *field;
-
-		trace_assign_type(field, entry);
-
-		SEQ_PUT_FIELD_RET(s, field->prev_pid);
-		SEQ_PUT_FIELD_RET(s, field->prev_prio);
-		SEQ_PUT_FIELD_RET(s, field->prev_state);
-		SEQ_PUT_FIELD_RET(s, field->next_pid);
-		SEQ_PUT_FIELD_RET(s, field->next_prio);
-		SEQ_PUT_FIELD_RET(s, field->next_state);
-		break;
-	}
-	case TRACE_SPECIAL:
-	case TRACE_USER_STACK:
-	case TRACE_STACK: {
-		struct special_entry *field;
-
-		trace_assign_type(field, entry);
+	event = ftrace_find_event(entry->type);
+	if (event && event->binary)
+		event->binary(s, entry, 0);
 
-		SEQ_PUT_FIELD_RET(s, field->arg1);
-		SEQ_PUT_FIELD_RET(s, field->arg2);
-		SEQ_PUT_FIELD_RET(s, field->arg3);
-		break;
-	}
-	}
-	return 1;
+	return TRACE_TYPE_HANDLED;
 }
 
 static int trace_empty(struct trace_iterator *iter)
@@ -3013,6 +2353,7 @@ static int tracing_set_tracer(char *buf)
 		if (ret)
 			goto out;
 	}
+	init_tracer_stat(t);
 
 	trace_branch_enable(tr);
  out:
@@ -3877,7 +3218,7 @@ __init static int tracer_alloc_buffers(void)
 #else
 	current_trace = &nop_trace;
 #endif
-
+	init_tracer_stat(current_trace);
 	/* All seems OK, enable tracing */
 	tracing_disabled = 0;
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4d3d381..b3f9ad1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,6 +9,7 @@
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
 #include <trace/boot.h>
+#include <trace/kmemtrace.h>
 
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
@@ -16,7 +17,6 @@ enum trace_type {
 	TRACE_FN,
 	TRACE_CTX,
 	TRACE_WAKE,
-	TRACE_CONT,
 	TRACE_STACK,
 	TRACE_PRINT,
 	TRACE_SPECIAL,
@@ -29,9 +29,11 @@ enum trace_type {
 	TRACE_GRAPH_ENT,
 	TRACE_USER_STACK,
 	TRACE_HW_BRANCHES,
+	TRACE_KMEM_ALLOC,
+	TRACE_KMEM_FREE,
 	TRACE_POWER,
 
-	__TRACE_LAST_TYPE
+	__TRACE_LAST_TYPE,
 };
 
 /*
@@ -170,6 +172,24 @@ struct trace_power {
 	struct power_trace	state_data;
 };
 
+struct kmemtrace_alloc_entry {
+	struct trace_entry	ent;
+	enum kmemtrace_type_id type_id;
+	unsigned long call_site;
+	const void *ptr;
+	size_t bytes_req;
+	size_t bytes_alloc;
+	gfp_t gfp_flags;
+	int node;
+};
+
+struct kmemtrace_free_entry {
+	struct trace_entry	ent;
+	enum kmemtrace_type_id type_id;
+	unsigned long call_site;
+	const void *ptr;
+};
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
@@ -178,7 +198,6 @@ struct trace_power {
  *  NEED_RESCED		- reschedule is requested
  *  HARDIRQ		- inside an interrupt handler
  *  SOFTIRQ		- inside a softirq handler
- *  CONT		- multiple entries hold the trace item
  */
 enum trace_flag_type {
 	TRACE_FLAG_IRQS_OFF		= 0x01,
@@ -186,7 +205,6 @@ enum trace_flag_type {
 	TRACE_FLAG_NEED_RESCHED		= 0x04,
 	TRACE_FLAG_HARDIRQ		= 0x08,
 	TRACE_FLAG_SOFTIRQ		= 0x10,
-	TRACE_FLAG_CONT			= 0x20,
 };
 
 #define TRACE_BUF_SIZE		1024
@@ -262,7 +280,6 @@ extern void __ftrace_bad_type(void);
 	do {								\
 		IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN);	\
 		IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);	\
-		IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
 		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
 		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
@@ -280,6 +297,10 @@ extern void __ftrace_bad_type(void);
 			  TRACE_GRAPH_RET);		\
 		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
  		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
+			  TRACE_KMEM_ALLOC);	\
+		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
+			  TRACE_KMEM_FREE);	\
 		__ftrace_bad_type();					\
 	} while (0)
 
@@ -314,6 +335,25 @@ struct tracer_flags {
 #define TRACER_OPT(s, b)	.name = #s, .bit = b
 
 /*
+ * If you want to provide a stat file (one-shot statistics), fill
+ * an iterator with stat_start/stat_next and a stat_show callbacks.
+ * The others callbacks are optional.
+ */
+struct tracer_stat {
+	/* The name of your stat file */
+	const char		*name;
+	/* Iteration over statistic entries */
+	void			*(*stat_start)(void);
+	void			*(*stat_next)(void *prev, int idx);
+	/* Compare two entries for sorting (optional) for stats */
+	int			(*stat_cmp)(void *p1, void *p2);
+	/* Print a stat entry */
+	int			(*stat_show)(struct seq_file *s, void *p);
+	/* Print the headers of your stat entries */
+	int			(*stat_headers)(struct seq_file *s);
+};
+
+/*
  * A specific tracer, represented by methods that operate on a trace array:
  */
 struct tracer {
@@ -340,6 +380,7 @@ struct tracer {
 	struct tracer		*next;
 	int			print_max;
 	struct tracer_flags 	*flags;
+	struct tracer_stat	*stats;
 };
 
 struct trace_seq {
@@ -425,6 +466,8 @@ void tracing_start_sched_switch_record(void);
 int register_tracer(struct tracer *type);
 void unregister_tracer(struct tracer *type);
 
+void init_tracer_stat(struct tracer *trace);
+
 extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 
 extern unsigned long tracing_max_latency;
@@ -456,10 +499,10 @@ struct tracer_switch_ops {
 	void				*private;
 	struct tracer_switch_ops	*next;
 };
-
-char *trace_find_cmdline(int pid);
 #endif /* CONFIG_CONTEXT_SWITCH_TRACER */
 
+extern char *trace_find_cmdline(int pid);
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern unsigned long ftrace_update_tot_cnt;
 #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
@@ -488,15 +531,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
-extern void trace_seq_print_cont(struct trace_seq *s,
-				 struct trace_iterator *iter);
-
-extern int
-seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
-		unsigned long sym_flags);
-extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
-				 size_t cnt);
 extern long ns2usecs(cycle_t nsec);
 extern int
 trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 366c8c3..0e94b3d 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -11,6 +11,7 @@
 #include <linux/kallsyms.h>
 
 #include "trace.h"
+#include "trace_output.h"
 
 static struct trace_array *boot_trace;
 static bool pre_initcalls_finished;
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 6c00feb..da5cf3e 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -14,12 +14,17 @@
 #include <linux/hash.h>
 #include <linux/fs.h>
 #include <asm/local.h>
+
 #include "trace.h"
+#include "trace_output.h"
+
+static struct tracer branch_trace;
 
 #ifdef CONFIG_BRANCH_TRACER
 
 static int branch_tracing_enabled __read_mostly;
 static DEFINE_MUTEX(branch_tracing_mutex);
+
 static struct trace_array *branch_tracer;
 
 static void
@@ -142,22 +147,50 @@ static void branch_trace_reset(struct trace_array *tr)
 	stop_branch_trace(tr);
 }
 
-struct tracer branch_trace __read_mostly =
+static int
+trace_print_print(struct trace_seq *s, struct trace_entry *entry, int flags)
 {
-	.name		= "branch",
-	.init		= branch_trace_init,
-	.reset		= branch_trace_reset,
-#ifdef CONFIG_FTRACE_SELFTEST
-	.selftest	= trace_selftest_startup_branch,
-#endif
-};
+	struct print_entry *field;
 
-__init static int init_branch_trace(void)
+	trace_assign_type(field, entry);
+
+	if (seq_print_ip_sym(s, field->ip, flags))
+		goto partial;
+
+	if (trace_seq_printf(s, ": %s", field->buf))
+		goto partial;
+
+ partial:
+	return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int
+trace_branch_print(struct trace_seq *s, struct trace_entry *entry, int flags)
 {
-	return register_tracer(&branch_trace);
+	struct trace_branch *field;
+
+	trace_assign_type(field, entry);
+
+	if (trace_seq_printf(s, "[%s] %s:%s:%d\n",
+			     field->correct ? "  ok  " : " MISS ",
+			     field->func,
+			     field->file,
+			     field->line))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return 0;
 }
 
-device_initcall(init_branch_trace);
+
+static struct trace_event trace_branch_event = {
+	.type	 	= TRACE_BRANCH,
+	.trace		= trace_branch_print,
+	.latency_trace	= trace_branch_print,
+	.raw		= trace_nop_print,
+	.hex		= trace_nop_print,
+	.binary		= trace_nop_print,
+};
+
 #else
 static inline
 void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
@@ -183,66 +216,39 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
 }
 EXPORT_SYMBOL(ftrace_likely_update);
 
-struct ftrace_pointer {
-	void		*start;
-	void		*stop;
-	int		hit;
-};
+extern unsigned long __start_annotated_branch_profile[];
+extern unsigned long __stop_annotated_branch_profile[];
 
-static void *
-t_next(struct seq_file *m, void *v, loff_t *pos)
+static int annotated_branch_stat_headers(struct seq_file *m)
 {
-	const struct ftrace_pointer *f = m->private;
-	struct ftrace_branch_data *p = v;
-
-	(*pos)++;
-
-	if (v == (void *)1)
-		return f->start;
-
-	++p;
-
-	if ((void *)p >= (void *)f->stop)
-		return NULL;
-
-	return p;
+	seq_printf(m, " correct incorrect  %% ");
+	seq_printf(m, "       Function                "
+			      "  File              Line\n"
+			      " ------- ---------  - "
+			      "       --------                "
+			      "  ----              ----\n");
+	return 0;
 }
 
-static void *t_start(struct seq_file *m, loff_t *pos)
+static inline long get_incorrect_percent(struct ftrace_branch_data *p)
 {
-	void *t = (void *)1;
-	loff_t l = 0;
+	long percent;
 
-	for (; t && l < *pos; t = t_next(m, t, &l))
-		;
+	if (p->correct) {
+		percent = p->incorrect * 100;
+		percent /= p->correct + p->incorrect;
+	} else
+		percent = p->incorrect ? 100 : -1;
 
-	return t;
+	return percent;
 }
 
-static void t_stop(struct seq_file *m, void *p)
+static int branch_stat_show(struct seq_file *m, void *v)
 {
-}
-
-static int t_show(struct seq_file *m, void *v)
-{
-	const struct ftrace_pointer *fp = m->private;
 	struct ftrace_branch_data *p = v;
 	const char *f;
 	long percent;
 
-	if (v == (void *)1) {
-		if (fp->hit)
-			seq_printf(m, "   miss      hit    %% ");
-		else
-			seq_printf(m, " correct incorrect  %% ");
-		seq_printf(m, "       Function                "
-			      "  File              Line\n"
-			      " ------- ---------  - "
-			      "       --------                "
-			      "  ----              ----\n");
-		return 0;
-	}
-
 	/* Only print the file, not the path */
 	f = p->file + strlen(p->file);
 	while (f >= p->file && *f != '/')
@@ -252,11 +258,7 @@ static int t_show(struct seq_file *m, void *v)
 	/*
 	 * The miss is overlayed on correct, and hit on incorrect.
 	 */
-	if (p->correct) {
-		percent = p->incorrect * 100;
-		percent /= p->correct + p->incorrect;
-	} else
-		percent = p->incorrect ? 100 : -1;
+	percent = get_incorrect_percent(p);
 
 	seq_printf(m, "%8lu %8lu ",  p->correct, p->incorrect);
 	if (percent < 0)
@@ -267,76 +269,130 @@ static int t_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations tracing_likely_seq_ops = {
-	.start		= t_start,
-	.next		= t_next,
-	.stop		= t_stop,
-	.show		= t_show,
-};
+static void *annotated_branch_stat_start(void)
+{
+	return __start_annotated_branch_profile;
+}
 
-static int tracing_branch_open(struct inode *inode, struct file *file)
+static void *
+annotated_branch_stat_next(void *v, int idx)
 {
-	int ret;
+	struct ftrace_branch_data *p = v;
 
-	ret = seq_open(file, &tracing_likely_seq_ops);
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-		m->private = (void *)inode->i_private;
-	}
+	++p;
 
-	return ret;
+	if ((void *)p >= (void *)__stop_annotated_branch_profile)
+		return NULL;
+
+	return p;
 }
 
-static const struct file_operations tracing_branch_fops = {
-	.open		= tracing_branch_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-};
+static int annotated_branch_stat_cmp(void *p1, void *p2)
+{
+	struct ftrace_branch_data *a = p1;
+	struct ftrace_branch_data *b = p2;
+
+	long percent_a, percent_b;
+
+	percent_a = get_incorrect_percent(a);
+	percent_b = get_incorrect_percent(b);
+
+	if (percent_a < percent_b)
+		return -1;
+	if (percent_a > percent_b)
+		return 1;
+	else
+		return 0;
+}
 
 #ifdef CONFIG_PROFILE_ALL_BRANCHES
+
 extern unsigned long __start_branch_profile[];
 extern unsigned long __stop_branch_profile[];
 
-static const struct ftrace_pointer ftrace_branch_pos = {
-	.start			= __start_branch_profile,
-	.stop			= __stop_branch_profile,
-	.hit			= 1,
-};
+static int all_branch_stat_headers(struct seq_file *m)
+{
+	seq_printf(m, "   miss      hit    %% ");
+	seq_printf(m, "       Function                "
+			      "  File              Line\n"
+			      " ------- ---------  - "
+			      "       --------                "
+			      "  ----              ----\n");
+	return 0;
+}
 
-#endif /* CONFIG_PROFILE_ALL_BRANCHES */
+static void *all_branch_stat_start(void)
+{
+	return __start_branch_profile;
+}
 
-extern unsigned long __start_annotated_branch_profile[];
-extern unsigned long __stop_annotated_branch_profile[];
+static void *
+all_branch_stat_next(void *v, int idx)
+{
+	struct ftrace_branch_data *p = v;
 
-static const struct ftrace_pointer ftrace_annotated_branch_pos = {
-	.start			= __start_annotated_branch_profile,
-	.stop			= __stop_annotated_branch_profile,
-};
+	++p;
 
-static __init int ftrace_branch_init(void)
-{
-	struct dentry *d_tracer;
-	struct dentry *entry;
+	if ((void *)p >= (void *)__stop_branch_profile)
+		return NULL;
 
-	d_tracer = tracing_init_dentry();
+	return p;
+}
 
-	entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer,
-				    (void *)&ftrace_annotated_branch_pos,
-				    &tracing_branch_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'profile_annotatet_branch' entry\n");
+static struct tracer_stat branch_stats[] = {
+	{.name = "annotated",
+	.stat_start = annotated_branch_stat_start,
+	.stat_next = annotated_branch_stat_next,
+	.stat_cmp = annotated_branch_stat_cmp,
+	.stat_headers = annotated_branch_stat_headers,
+	.stat_show = branch_stat_show},
+
+	{.name = "all",
+	.stat_start = all_branch_stat_start,
+	.stat_next = all_branch_stat_next,
+	.stat_headers = all_branch_stat_headers,
+	.stat_show = branch_stat_show},
+
+	{ }
+};
+#else
+static struct tracer_stat branch_stats[] = {
+	{.name = "annotated",
+	.stat_start = annotated_branch_stat_start,
+	.stat_next = annotated_branch_stat_next,
+	.stat_cmp = annotated_branch_stat_cmp,
+	.stat_headers = annotated_branch_stat_headers,
+	.stat_show = branch_stat_show},
+
+	{ }
+};
+#endif /* CONFIG_PROFILE_ALL_BRANCHES */
 
-#ifdef CONFIG_PROFILE_ALL_BRANCHES
-	entry = debugfs_create_file("profile_branch", 0444, d_tracer,
-				    (void *)&ftrace_branch_pos,
-				    &tracing_branch_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs"
-			   " 'profile_branch' entry\n");
+
+static struct tracer branch_trace __read_mostly =
+{
+	.name		= "branch",
+#ifdef CONFIG_BRANCH_TRACER
+	.init		= branch_trace_init,
+	.reset		= branch_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+	.selftest	= trace_selftest_startup_branch,
+#endif /* CONFIG_FTRACE_SELFTEST */
 #endif
+	.stats		= branch_stats
+};
 
-	return 0;
-}
+__init static int init_branch_trace(void)
+{
+#ifdef CONFIG_BRANCH_TRACER
+	int ret;
+	ret = register_ftrace_event(&trace_branch_event);
+	if (!ret) {
+		printk(KERN_WARNING "Warning: could not register branch events\n");
+		return 1;
+	}
+#endif
 
-device_initcall(ftrace_branch_init);
+	return register_tracer(&branch_trace);
+}
+device_initcall(init_branch_trace);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 930c08e..3c54598 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -12,6 +12,7 @@
 #include <linux/fs.h>
 
 #include "trace.h"
+#include "trace_output.h"
 
 #define TRACE_GRAPH_INDENT	2
 
@@ -589,8 +590,11 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	if (ent->flags & TRACE_FLAG_CONT)
-		trace_seq_print_cont(s, iter);
+	/* Strip ending newline */
+	if (s->buffer[s->len - 1] == '\n') {
+		s->buffer[s->len - 1] = '\0';
+		s->len--;
+	}
 
 	ret = trace_seq_printf(s, " */\n");
 	if (!ret)
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 649df22..df21c1e 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -14,6 +14,7 @@
 #include <asm/ds.h>
 
 #include "trace.h"
+#include "trace_output.h"
 
 
 #define SIZEOF_BTS (1 << 13)
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index fffcb06..621c8c3 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,8 +9,10 @@
 #include <linux/kernel.h>
 #include <linux/mmiotrace.h>
 #include <linux/pci.h>
+#include <asm/atomic.h>
 
 #include "trace.h"
+#include "trace_output.h"
 
 struct header_iter {
 	struct pci_dev *dev;
@@ -19,6 +21,7 @@ struct header_iter {
 static struct trace_array *mmio_trace_array;
 static bool overrun_detected;
 static unsigned long prev_overruns;
+static atomic_t dropped_count;
 
 static void mmio_reset_data(struct trace_array *tr)
 {
@@ -121,11 +124,11 @@ static void mmio_close(struct trace_iterator *iter)
 
 static unsigned long count_overruns(struct trace_iterator *iter)
 {
-	unsigned long cnt = 0;
+	unsigned long cnt = atomic_xchg(&dropped_count, 0);
 	unsigned long over = ring_buffer_overruns(iter->tr->buffer);
 
 	if (over > prev_overruns)
-		cnt = over - prev_overruns;
+		cnt += over - prev_overruns;
 	prev_overruns = over;
 	return cnt;
 }
@@ -262,9 +265,6 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	if (entry->flags & TRACE_FLAG_CONT)
-		trace_seq_print_cont(s, iter);
-
 	return TRACE_TYPE_HANDLED;
 }
 
@@ -310,8 +310,10 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 
 	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
 					   &irq_flags);
-	if (!event)
+	if (!event) {
+		atomic_inc(&dropped_count);
 		return;
+	}
 	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
 	entry->ent.type			= TRACE_MMIO_RW;
@@ -338,8 +340,10 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 
 	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
 					   &irq_flags);
-	if (!event)
+	if (!event) {
+		atomic_inc(&dropped_count);
 		return;
+	}
 	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
 	entry->ent.type			= TRACE_MMIO_MAP;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
new file mode 100644
index 0000000..df0c25c
--- /dev/null
+++ b/kernel/trace/trace_output.c
@@ -0,0 +1,832 @@
+/*
+ * trace_output.c
+ *
+ * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ftrace.h>
+
+#include "trace_output.h"
+
+/* must be a power of 2 */
+#define EVENT_HASHSIZE	128
+
+static DEFINE_MUTEX(trace_event_mutex);
+static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
+
+static int next_event_type = __TRACE_LAST_TYPE + 1;
+
+/**
+ * trace_seq_printf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	va_list ap;
+	int ret;
+
+	if (!len)
+		return 0;
+
+	va_start(ap, fmt);
+	ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
+	va_end(ap);
+
+	/* If we can't write it all, don't bother writing anything */
+	if (ret >= len)
+		return 0;
+
+	s->len += ret;
+
+	return len;
+}
+
+/**
+ * trace_seq_puts - trace sequence printing of simple string
+ * @s: trace sequence descriptor
+ * @str: simple string to record
+ *
+ * The tracer may use either the sequence operations or its own
+ * copy to user routines. This function records a simple string
+ * into a special buffer (@s) for later retrieval by a sequencer
+ * or other mechanism.
+ */
+int trace_seq_puts(struct trace_seq *s, const char *str)
+{
+	int len = strlen(str);
+
+	if (len > ((PAGE_SIZE - 1) - s->len))
+		return 0;
+
+	memcpy(s->buffer + s->len, str, len);
+	s->len += len;
+
+	return len;
+}
+
+int trace_seq_putc(struct trace_seq *s, unsigned char c)
+{
+	if (s->len >= (PAGE_SIZE - 1))
+		return 0;
+
+	s->buffer[s->len++] = c;
+
+	return 1;
+}
+
+int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
+{
+	if (len > ((PAGE_SIZE - 1) - s->len))
+		return 0;
+
+	memcpy(s->buffer + s->len, mem, len);
+	s->len += len;
+
+	return len;
+}
+
+int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
+{
+	unsigned char hex[HEX_CHARS];
+	unsigned char *data = mem;
+	int i, j;
+
+#ifdef __BIG_ENDIAN
+	for (i = 0, j = 0; i < len; i++) {
+#else
+	for (i = len-1, j = 0; i >= 0; i--) {
+#endif
+		hex[j++] = hex_asc_hi(data[i]);
+		hex[j++] = hex_asc_lo(data[i]);
+	}
+	hex[j++] = ' ';
+
+	return trace_seq_putmem(s, hex, j);
+}
+
+int trace_seq_path(struct trace_seq *s, struct path *path)
+{
+	unsigned char *p;
+
+	if (s->len >= (PAGE_SIZE - 1))
+		return 0;
+	p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
+	if (!IS_ERR(p)) {
+		p = mangle_path(s->buffer + s->len, p, "\n");
+		if (p) {
+			s->len = p - s->buffer;
+			return 1;
+		}
+	} else {
+		s->buffer[s->len++] = '?';
+		return 1;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_KRETPROBES
+static inline const char *kretprobed(const char *name)
+{
+	static const char tramp_name[] = "kretprobe_trampoline";
+	int size = sizeof(tramp_name);
+
+	if (strncmp(tramp_name, name, size) == 0)
+		return "[unknown/kretprobe'd]";
+	return name;
+}
+#else
+static inline const char *kretprobed(const char *name)
+{
+	return name;
+}
+#endif /* CONFIG_KRETPROBES */
+
+static int
+seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
+{
+#ifdef CONFIG_KALLSYMS
+	char str[KSYM_SYMBOL_LEN];
+	const char *name;
+
+	kallsyms_lookup(address, NULL, NULL, NULL, str);
+
+	name = kretprobed(str);
+
+	return trace_seq_printf(s, fmt, name);
+#endif
+	return 1;
+}
+
+static int
+seq_print_sym_offset(struct trace_seq *s, const char *fmt,
+		     unsigned long address)
+{
+#ifdef CONFIG_KALLSYMS
+	char str[KSYM_SYMBOL_LEN];
+	const char *name;
+
+	sprint_symbol(str, address);
+	name = kretprobed(str);
+
+	return trace_seq_printf(s, fmt, name);
+#endif
+	return 1;
+}
+
+#ifndef CONFIG_64BIT
+# define IP_FMT "%08lx"
+#else
+# define IP_FMT "%016lx"
+#endif
+
+int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+		      unsigned long ip, unsigned long sym_flags)
+{
+	struct file *file = NULL;
+	unsigned long vmstart = 0;
+	int ret = 1;
+
+	if (mm) {
+		const struct vm_area_struct *vma;
+
+		down_read(&mm->mmap_sem);
+		vma = find_vma(mm, ip);
+		if (vma) {
+			file = vma->vm_file;
+			vmstart = vma->vm_start;
+		}
+		if (file) {
+			ret = trace_seq_path(s, &file->f_path);
+			if (ret)
+				ret = trace_seq_printf(s, "[+0x%lx]",
+						       ip - vmstart);
+		}
+		up_read(&mm->mmap_sem);
+	}
+	if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
+		ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+	return ret;
+}
+
+int
+seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+		      unsigned long sym_flags)
+{
+	struct mm_struct *mm = NULL;
+	int ret = 1;
+	unsigned int i;
+
+	if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
+		struct task_struct *task;
+		/*
+		 * we do the lookup on the thread group leader,
+		 * since individual threads might have already quit!
+		 */
+		rcu_read_lock();
+		task = find_task_by_vpid(entry->ent.tgid);
+		if (task)
+			mm = get_task_mm(task);
+		rcu_read_unlock();
+	}
+
+	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+		unsigned long ip = entry->caller[i];
+
+		if (ip == ULONG_MAX || !ret)
+			break;
+		if (i && ret)
+			ret = trace_seq_puts(s, " <- ");
+		if (!ip) {
+			if (ret)
+				ret = trace_seq_puts(s, "??");
+			continue;
+		}
+		if (!ret)
+			break;
+		if (ret)
+			ret = seq_print_user_ip(s, mm, ip, sym_flags);
+	}
+
+	if (mm)
+		mmput(mm);
+	return ret;
+}
+
+int
+seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
+{
+	int ret;
+
+	if (!ip)
+		return trace_seq_printf(s, "0");
+
+	if (sym_flags & TRACE_ITER_SYM_OFFSET)
+		ret = seq_print_sym_offset(s, "%s", ip);
+	else
+		ret = seq_print_sym_short(s, "%s", ip);
+
+	if (!ret)
+		return 0;
+
+	if (sym_flags & TRACE_ITER_SYM_ADDR)
+		ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+	return ret;
+}
+
+static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
+
+static int task_state_char(unsigned long state)
+{
+	int bit = state ? __ffs(state) + 1 : 0;
+
+	return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
+}
+
+/**
+ * ftrace_find_event - find a registered event
+ * @type: the type of event to look for
+ *
+ * Returns an event of type @type otherwise NULL
+ */
+struct trace_event *ftrace_find_event(int type)
+{
+	struct trace_event *event;
+	struct hlist_node *n;
+	unsigned key;
+
+	key = type & (EVENT_HASHSIZE - 1);
+
+	hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
+		if (event->type == type)
+			return event;
+	}
+
+	return NULL;
+}
+
+/**
+ * register_ftrace_event - register output for an event type
+ * @event: the event type to register
+ *
+ * Event types are stored in a hash and this hash is used to
+ * find a way to print an event. If the @event->type is set
+ * then it will use that type, otherwise it will assign a
+ * type to use.
+ *
+ * If you assign your own type, please make sure it is added
+ * to the trace_type enum in trace.h, to avoid collisions
+ * with the dynamic types.
+ *
+ * Returns the event type number or zero on error.
+ */
+int register_ftrace_event(struct trace_event *event)
+{
+	unsigned key;
+	int ret = 0;
+
+	mutex_lock(&trace_event_mutex);
+
+	if (!event->type)
+		event->type = next_event_type++;
+	else if (event->type > __TRACE_LAST_TYPE) {
+		printk(KERN_WARNING "Need to add type to trace.h\n");
+		WARN_ON(1);
+	}
+
+	if (ftrace_find_event(event->type))
+		goto out;
+
+	key = event->type & (EVENT_HASHSIZE - 1);
+
+	hlist_add_head_rcu(&event->node, &event_hash[key]);
+
+	ret = event->type;
+ out:
+	mutex_unlock(&trace_event_mutex);
+
+	return ret;
+}
+
+/**
+ * unregister_ftrace_event - remove a no longer used event
+ * @event: the event to remove
+ */
+int unregister_ftrace_event(struct trace_event *event)
+{
+	mutex_lock(&trace_event_mutex);
+	hlist_del(&event->node);
+	mutex_unlock(&trace_event_mutex);
+
+	return 0;
+}
+
+/*
+ * Standard events
+ */
+
+int
+trace_nop_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	return 0;
+}
+
+/* TRACE_FN */
+static int
+trace_fn_latency(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	struct ftrace_entry *field;
+
+	trace_assign_type(field, entry);
+
+	if (!seq_print_ip_sym(s, field->ip, flags))
+		goto partial;
+	if (!trace_seq_puts(s, " ("))
+		goto partial;
+	if (!seq_print_ip_sym(s, field->parent_ip, flags))
+		goto partial;
+	if (!trace_seq_puts(s, ")\n"))
+		goto partial;
+
+	return 0;
+
+ partial:
+	return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int
+trace_fn_trace(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	struct ftrace_entry *field;
+
+	trace_assign_type(field, entry);
+
+	if (!seq_print_ip_sym(s, field->ip, flags))
+		goto partial;
+
+	if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
+		if (!trace_seq_printf(s, " <-"))
+			goto partial;
+		if (!seq_print_ip_sym(s,
+				      field->parent_ip,
+				      flags))
+			goto partial;
+	}
+	if (!trace_seq_printf(s, "\n"))
+		goto partial;
+
+	return 0;
+
+ partial:
+	return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int
+trace_fn_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	struct ftrace_entry *field;
+
+	trace_assign_type(field, entry);
+
+	if (trace_seq_printf(s, "%x %x\n",
+			     field->ip,
+			     field->parent_ip))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return 0;
+}
+
+static int
+trace_fn_hex(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	struct ftrace_entry *field;
+
+	trace_assign_type(field, entry);
+
+	SEQ_PUT_HEX_FIELD_RET(s, field->ip);
+	SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
+
+	return 0;
+}
+
+static int
+trace_fn_bin(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	struct ftrace_entry *field;
+
+	trace_assign_type(field, entry);
+
+	SEQ_PUT_FIELD_RET(s, field->ip);
+	SEQ_PUT_FIELD_RET(s, field->parent_ip);
+
+	return 0;
+}
+
+static struct trace_event trace_fn_event = {
+	.type	 	= TRACE_FN,
+	.trace		= trace_fn_trace,
+	.latency_trace	= trace_fn_latency,
+	.raw		= trace_fn_raw,
+	.hex		= trace_fn_hex,
+	.binary		= trace_fn_bin,
+};
+
+/* TRACE_CTX an TRACE_WAKE */
+static int
+trace_ctxwake_print(struct trace_seq *s, struct trace_entry *entry, int flags,
+		    char *delim)
+{
+	struct ctx_switch_entry *field;
+	char *comm;
+	int S, T;
+
+	trace_assign_type(field, entry);
+
+	T = task_state_char(field->next_state);
+	S = task_state_char(field->prev_state);
+	comm = trace_find_cmdline(field->next_pid);
+	if (trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
+			     field->prev_pid,
+			     field->prev_prio,
+			     S, delim,
+			     field->next_cpu,
+			     field->next_pid,
+			     field->next_prio,
+			     T, comm))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return 0;
+}
+
+static int
+trace_ctx_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	return trace_ctxwake_print(s, entry, flags, "==>");
+}
+
+static int
+trace_wake_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	return trace_ctxwake_print(s, entry, flags, "  +");
+}
+
+static int
+trace_ctxwake_raw(struct trace_seq *s, struct trace_entry *entry, int flags,
+		  char S)
+{
+	struct ctx_switch_entry *field;
+	int T;
+
+	trace_assign_type(field, entry);
+
+	if (!S)
+		task_state_char(field->prev_state);
+	T = task_state_char(field->next_state);
+	if (trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
+			     field->prev_pid,
+			     field->prev_prio,
+			     S,
+			     field->next_cpu,
+			     field->next_pid,
+			     field->next_prio,
+			     T))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return 0;
+}
+
+static int
+trace_ctx_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	return trace_ctxwake_raw(s, entry, flags, 0);
+}
+
+static int
+trace_wake_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	return trace_ctxwake_raw(s, entry, flags, '+');
+}
+
+
+static int
+trace_ctxwake_hex(struct trace_seq *s, struct trace_entry *entry, int flags,
+		  char S)
+{
+	struct ctx_switch_entry *field;
+	int T;
+
+	trace_assign_type(field, entry);
+
+	if (!S)
+		task_state_char(field->prev_state);
+	T = task_state_char(field->next_state);
+
+	SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
+	SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
+	SEQ_PUT_HEX_FIELD_RET(s, S);
+	SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
+	SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
+	SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
+	SEQ_PUT_HEX_FIELD_RET(s, T);
+
+	return 0;
+}
+
+static int
+trace_ctx_hex(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	return trace_ctxwake_hex(s, entry, flags, 0);
+}
+
+static int
+trace_wake_hex(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	return trace_ctxwake_hex(s, entry, flags, '+');
+}
+
+static int
+trace_ctxwake_bin(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	struct ctx_switch_entry *field;
+
+	trace_assign_type(field, entry);
+
+	SEQ_PUT_FIELD_RET(s, field->prev_pid);
+	SEQ_PUT_FIELD_RET(s, field->prev_prio);
+	SEQ_PUT_FIELD_RET(s, field->prev_state);
+	SEQ_PUT_FIELD_RET(s, field->next_pid);
+	SEQ_PUT_FIELD_RET(s, field->next_prio);
+	SEQ_PUT_FIELD_RET(s, field->next_state);
+
+	return 0;
+}
+
+static struct trace_event trace_ctx_event = {
+	.type	 	= TRACE_CTX,
+	.trace		= trace_ctx_print,
+	.latency_trace	= trace_ctx_print,
+	.raw		= trace_ctx_raw,
+	.hex		= trace_ctx_hex,
+	.binary		= trace_ctxwake_bin,
+};
+
+static struct trace_event trace_wake_event = {
+	.type	 	= TRACE_WAKE,
+	.trace		= trace_wake_print,
+	.latency_trace	= trace_wake_print,
+	.raw		= trace_wake_raw,
+	.hex		= trace_wake_hex,
+	.binary		= trace_ctxwake_bin,
+};
+
+/* TRACE_SPECIAL */
+static int
+trace_special_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	struct special_entry *field;
+
+	trace_assign_type(field, entry);
+
+	if (trace_seq_printf(s, "# %ld %ld %ld\n",
+			     field->arg1,
+			     field->arg2,
+			     field->arg3))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return 0;
+}
+
+static int
+trace_special_hex(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	struct special_entry *field;
+
+	trace_assign_type(field, entry);
+
+	SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
+	SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
+	SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
+
+	return 0;
+}
+
+static int
+trace_special_bin(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	struct special_entry *field;
+
+	trace_assign_type(field, entry);
+
+	SEQ_PUT_FIELD_RET(s, field->arg1);
+	SEQ_PUT_FIELD_RET(s, field->arg2);
+	SEQ_PUT_FIELD_RET(s, field->arg3);
+
+	return 0;
+}
+
+static struct trace_event trace_special_event = {
+	.type	 	= TRACE_SPECIAL,
+	.trace		= trace_special_print,
+	.latency_trace	= trace_special_print,
+	.raw		= trace_special_print,
+	.hex		= trace_special_hex,
+	.binary		= trace_special_bin,
+};
+
+/* TRACE_STACK */
+
+static int
+trace_stack_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	struct stack_entry *field;
+	int i;
+
+	trace_assign_type(field, entry);
+
+	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+		if (i) {
+			if (trace_seq_puts(s, " <= "))
+				goto partial;
+
+			if (seq_print_ip_sym(s, field->caller[i], flags))
+				goto partial;
+		}
+		if (trace_seq_puts(s, "\n"))
+			goto partial;
+	}
+
+	return 0;
+
+ partial:
+	return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct trace_event trace_stack_event = {
+	.type	 	= TRACE_STACK,
+	.trace		= trace_stack_print,
+	.latency_trace	= trace_stack_print,
+	.raw		= trace_special_print,
+	.hex		= trace_special_hex,
+	.binary		= trace_special_bin,
+};
+
+/* TRACE_USER_STACK */
+static int
+trace_user_stack_print(struct trace_seq *s, struct trace_entry *entry,
+		       int flags)
+{
+	struct userstack_entry *field;
+
+	trace_assign_type(field, entry);
+
+	if (seq_print_userip_objs(field, s, flags))
+		goto partial;
+
+	if (trace_seq_putc(s, '\n'))
+		goto partial;
+
+	return 0;
+
+ partial:
+	return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct trace_event trace_user_stack_event = {
+	.type	 	= TRACE_USER_STACK,
+	.trace		= trace_user_stack_print,
+	.latency_trace	= trace_user_stack_print,
+	.raw		= trace_special_print,
+	.hex		= trace_special_hex,
+	.binary		= trace_special_bin,
+};
+
+/* TRACE_PRINT */
+static int
+trace_print_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	struct print_entry *field;
+
+	trace_assign_type(field, entry);
+
+	if (seq_print_ip_sym(s, field->ip, flags))
+		goto partial;
+
+	if (trace_seq_printf(s, ": %s", field->buf))
+		goto partial;
+
+	return 0;
+
+ partial:
+	return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int
+trace_print_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+	struct print_entry *field;
+
+	trace_assign_type(field, entry);
+
+	if (seq_print_ip_sym(s, field->ip, flags))
+		goto partial;
+
+	if (trace_seq_printf(s, "# %lx %s", field->ip, field->buf))
+		goto partial;
+
+	return 0;
+
+ partial:
+	return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct trace_event trace_print_event = {
+	.type	 	= TRACE_PRINT,
+	.trace		= trace_print_print,
+	.latency_trace	= trace_print_print,
+	.raw		= trace_print_raw,
+	.hex		= trace_nop_print,
+	.binary		= trace_nop_print,
+};
+
+static struct trace_event *events[] __initdata = {
+	&trace_fn_event,
+	&trace_ctx_event,
+	&trace_wake_event,
+	&trace_special_event,
+	&trace_stack_event,
+	&trace_user_stack_event,
+	&trace_print_event,
+	NULL
+};
+
+__init static int init_events(void)
+{
+	struct trace_event *event;
+	int i, ret;
+
+	for (i = 0; events[i]; i++) {
+		event = events[i];
+
+		ret = register_ftrace_event(event);
+		if (!ret) {
+			printk(KERN_WARNING "event %d failed to register\n",
+			       event->type);
+			WARN_ON_ONCE(1);
+		}
+	}
+
+	return 0;
+}
+device_initcall(init_events);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
new file mode 100644
index 0000000..ecab4ea
--- /dev/null
+++ b/kernel/trace/trace_output.h
@@ -0,0 +1,59 @@
+#ifndef __TRACE_EVENTS_H
+#define __TRACE_EVENTS_H
+
+#include "trace.h"
+
+typedef int (*trace_print_func)(struct trace_seq *s, struct trace_entry *entry,
+				int flags);
+
+struct trace_event {
+	struct hlist_node	node;
+	int			type;
+	trace_print_func	trace;
+	trace_print_func	latency_trace;
+	trace_print_func	raw;
+	trace_print_func	hex;
+	trace_print_func	binary;
+};
+
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
+extern int
+seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
+		unsigned long sym_flags);
+extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+				 size_t cnt);
+int trace_seq_puts(struct trace_seq *s, const char *str);
+int trace_seq_putc(struct trace_seq *s, unsigned char c);
+int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len);
+int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len);
+int trace_seq_path(struct trace_seq *s, struct path *path);
+int seq_print_userip_objs(const struct userstack_entry *entry,
+			  struct trace_seq *s, unsigned long sym_flags);
+int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+		      unsigned long ip, unsigned long sym_flags);
+
+struct trace_event *ftrace_find_event(int type);
+int register_ftrace_event(struct trace_event *event);
+int unregister_ftrace_event(struct trace_event *event);
+
+int
+trace_nop_print(struct trace_seq *s, struct trace_entry *entry, int flags);
+
+#define MAX_MEMHEX_BYTES	8
+#define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
+
+#define SEQ_PUT_FIELD_RET(s, x)				\
+do {							\
+	if (!trace_seq_putmem(s, &(x), sizeof(x)))	\
+		return 0;				\
+} while (0)
+
+#define SEQ_PUT_HEX_FIELD_RET(s, x)			\
+do {							\
+	BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);	\
+	if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))	\
+		return 0;				\
+} while (0)
+
+#endif
+
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 7bda248..faa6ab7 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 
 #include "trace.h"
+#include "trace_output.h"
 
 static struct trace_array *power_trace;
 static int __read_mostly trace_power_enabled;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 88c8eb7..5013812 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,7 +9,6 @@ static inline int trace_valid_entry(struct trace_entry *entry)
 	case TRACE_FN:
 	case TRACE_CTX:
 	case TRACE_WAKE:
-	case TRACE_CONT:
 	case TRACE_STACK:
 	case TRACE_PRINT:
 	case TRACE_SPECIAL:
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
new file mode 100644
index 0000000..1515f9e
--- /dev/null
+++ b/kernel/trace/trace_stat.c
@@ -0,0 +1,351 @@
+/*
+ * Infrastructure for statistic tracing (histogram output).
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Based on the code from trace_branch.c which is
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+
+#include <linux/list.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include "trace.h"
+
+
+/* List of stat entries from a tracer */
+struct trace_stat_list {
+	struct list_head list;
+	void *stat;
+};
+
+/* A stat session is the stats output in one file */
+struct tracer_stat_session {
+	struct tracer_stat *ts;
+	struct list_head stat_list;
+	struct mutex stat_mutex;
+};
+
+/* All of the sessions currently in use. Each stat file embeed one session */
+static struct tracer_stat_session **all_stat_sessions;
+static int nb_sessions;
+static struct dentry *stat_dir, **stat_files;
+
+
+static void reset_stat_session(struct tracer_stat_session *session)
+{
+	struct trace_stat_list *node, *next;
+
+	list_for_each_entry_safe(node, next, &session->stat_list, list)
+		kfree(node);
+
+	INIT_LIST_HEAD(&session->stat_list);
+}
+
+/* Called when a tracer is initialized */
+static int init_all_sessions(int nb, struct tracer_stat *ts)
+{
+	int i, j;
+	struct tracer_stat_session *session;
+
+	nb_sessions = 0;
+
+	if (all_stat_sessions) {
+		for (i = 0; i < nb_sessions; i++) {
+			session = all_stat_sessions[i];
+			reset_stat_session(session);
+			mutex_destroy(&session->stat_mutex);
+			kfree(session);
+		}
+	}
+	all_stat_sessions = kmalloc(sizeof(struct tracer_stat_session *) * nb,
+				    GFP_KERNEL);
+	if (!all_stat_sessions)
+		return -ENOMEM;
+
+	for (i = 0; i < nb; i++) {
+		session = kmalloc(sizeof(struct tracer_stat_session) * nb,
+				  GFP_KERNEL);
+		if (!session)
+			goto free_sessions;
+
+		INIT_LIST_HEAD(&session->stat_list);
+		mutex_init(&session->stat_mutex);
+		session->ts = &ts[i];
+		all_stat_sessions[i] = session;
+	}
+	nb_sessions = nb;
+	return 0;
+
+free_sessions:
+
+	for (j = 0; j < i; j++)
+		kfree(all_stat_sessions[i]);
+
+	kfree(all_stat_sessions);
+	all_stat_sessions = NULL;
+
+	return -ENOMEM;
+}
+
+static int basic_tracer_stat_checks(struct tracer_stat *ts)
+{
+	int i;
+
+	if (!ts)
+		return 0;
+
+	for (i = 0; ts[i].name; i++) {
+		if (!ts[i].stat_start || !ts[i].stat_next || !ts[i].stat_show)
+			return -EBUSY;
+	}
+	return i;
+}
+
+/*
+ * For tracers that don't provide a stat_cmp callback.
+ * This one will force an immediate insertion on tail of
+ * the list.
+ */
+static int dummy_cmp(void *p1, void *p2)
+{
+	return 1;
+}
+
+/*
+ * Initialize the stat list at each trace_stat file opening.
+ * All of these copies and sorting are required on all opening
+ * since the stats could have changed between two file sessions.
+ */
+static int stat_seq_init(struct tracer_stat_session *session)
+{
+	struct trace_stat_list *iter_entry, *new_entry;
+	struct tracer_stat *ts = session->ts;
+	void *prev_stat;
+	int ret = 0;
+	int i;
+
+	mutex_lock(&session->stat_mutex);
+	reset_stat_session(session);
+
+	if (!ts->stat_cmp)
+		ts->stat_cmp = dummy_cmp;
+
+	/*
+	 * The first entry. Actually this is the second, but the first
+	 * one (the stat_list head) is pointless.
+	 */
+	new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
+	if (!new_entry) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	INIT_LIST_HEAD(&new_entry->list);
+
+	list_add(&new_entry->list, &session->stat_list);
+
+	new_entry->stat = ts->stat_start();
+	prev_stat = new_entry->stat;
+
+	/*
+	 * Iterate over the tracer stat entries and store them in a sorted
+	 * list.
+	 */
+	for (i = 1; ; i++) {
+		new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
+		if (!new_entry) {
+			ret = -ENOMEM;
+			goto exit_free_list;
+		}
+
+		INIT_LIST_HEAD(&new_entry->list);
+		new_entry->stat = ts->stat_next(prev_stat, i);
+
+		/* End of insertion */
+		if (!new_entry->stat)
+			break;
+
+		list_for_each_entry(iter_entry, &session->stat_list, list) {
+
+			/* Insertion with a descendent sorting */
+			if (ts->stat_cmp(new_entry->stat,
+						iter_entry->stat) > 0) {
+
+				list_add_tail(&new_entry->list,
+						&iter_entry->list);
+				break;
+
+			/* The current smaller value */
+			} else if (list_is_last(&iter_entry->list,
+						&session->stat_list)) {
+				list_add(&new_entry->list, &iter_entry->list);
+				break;
+			}
+		}
+
+		prev_stat = new_entry->stat;
+	}
+exit:
+	mutex_unlock(&session->stat_mutex);
+	return ret;
+
+exit_free_list:
+	reset_stat_session(session);
+	mutex_unlock(&session->stat_mutex);
+	return ret;
+}
+
+
+static void *stat_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct tracer_stat_session *session = s->private;
+
+	/* Prevent from tracer switch or stat_list modification */
+	mutex_lock(&session->stat_mutex);
+
+	/* If we are in the beginning of the file, print the headers */
+	if (!*pos && session->ts->stat_headers)
+		session->ts->stat_headers(s);
+
+	return seq_list_start(&session->stat_list, *pos);
+}
+
+static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
+{
+	struct tracer_stat_session *session = s->private;
+
+	return seq_list_next(p, &session->stat_list, pos);
+}
+
+static void stat_seq_stop(struct seq_file *s, void *p)
+{
+	struct tracer_stat_session *session = s->private;
+	mutex_unlock(&session->stat_mutex);
+}
+
+static int stat_seq_show(struct seq_file *s, void *v)
+{
+	struct tracer_stat_session *session = s->private;
+	struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
+
+	return session->ts->stat_show(s, l->stat);
+}
+
+static const struct seq_operations trace_stat_seq_ops = {
+	.start = stat_seq_start,
+	.next = stat_seq_next,
+	.stop = stat_seq_stop,
+	.show = stat_seq_show
+};
+
+/* The session stat is refilled and resorted at each stat file opening */
+static int tracing_stat_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	struct tracer_stat_session *session = inode->i_private;
+
+	ret = seq_open(file, &trace_stat_seq_ops);
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		m->private = session;
+		ret = stat_seq_init(session);
+	}
+
+	return ret;
+}
+
+
+/*
+ * Avoid consuming memory with our now useless list.
+ */
+static int tracing_stat_release(struct inode *i, struct file *f)
+{
+	struct tracer_stat_session *session = i->i_private;
+
+	mutex_lock(&session->stat_mutex);
+	reset_stat_session(session);
+	mutex_unlock(&session->stat_mutex);
+
+	return 0;
+}
+
+static const struct file_operations tracing_stat_fops = {
+	.open		= tracing_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= tracing_stat_release
+};
+
+
+static void destroy_trace_stat_files(void)
+{
+	int i;
+
+	if (stat_files) {
+		for (i = 0; i < nb_sessions; i++)
+			debugfs_remove(stat_files[i]);
+		kfree(stat_files);
+		stat_files = NULL;
+	}
+}
+
+static void init_trace_stat_files(void)
+{
+	int i;
+
+	if (!stat_dir || !nb_sessions)
+		return;
+
+	stat_files = kmalloc(sizeof(struct dentry *) * nb_sessions, GFP_KERNEL);
+
+	if (!stat_files) {
+		pr_warning("trace stat: not enough memory\n");
+		return;
+	}
+
+	for (i = 0; i < nb_sessions; i++) {
+		struct tracer_stat_session *session = all_stat_sessions[i];
+		stat_files[i] = debugfs_create_file(session->ts->name, 0644,
+						stat_dir,
+						session, &tracing_stat_fops);
+		if (!stat_files[i])
+			pr_warning("cannot create %s entry\n",
+				   session->ts->name);
+	}
+}
+
+void init_tracer_stat(struct tracer *trace)
+{
+	int nb = basic_tracer_stat_checks(trace->stats);
+
+	destroy_trace_stat_files();
+
+	if (nb < 0) {
+		pr_warning("stat tracing: missing stat callback on %s\n",
+			   trace->name);
+		return;
+	}
+	if (!nb)
+		return;
+
+	init_all_sessions(nb, trace->stats);
+	init_trace_stat_files();
+}
+
+static int __init tracing_stat_init(void)
+{
+	struct dentry *d_tracing;
+
+	d_tracing = tracing_init_dentry();
+
+	stat_dir = debugfs_create_dir("trace_stat", d_tracing);
+	if (!stat_dir)
+		pr_warning("Could not create debugfs "
+			   "'trace_stat' entry\n");
+	return 0;
+}
+fs_initcall(tracing_stat_init);
diff --git a/mm/slab.c b/mm/slab.c
index ddc41f3..dae716b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,6 +102,7 @@
 #include	<linux/cpu.h>
 #include	<linux/sysctl.h>
 #include	<linux/module.h>
+#include	<trace/kmemtrace.h>
 #include	<linux/rcupdate.h>
 #include	<linux/string.h>
 #include	<linux/uaccess.h>
@@ -568,6 +569,14 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
 
 #endif
 
+#ifdef CONFIG_KMEMTRACE
+size_t slab_buffer_size(struct kmem_cache *cachep)
+{
+	return cachep->buffer_size;
+}
+EXPORT_SYMBOL(slab_buffer_size);
+#endif
+
 /*
  * Do not go above this order unless 0 objects fit into the slab.
  */
@@ -3550,10 +3559,23 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
  */
 void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 {
-	return __cache_alloc(cachep, flags, __builtin_return_address(0));
+	void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
+
+	kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+			     obj_size(cachep), cachep->buffer_size, flags);
+
+	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc);
 
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
+{
+	return __cache_alloc(cachep, flags, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+#endif
+
 /**
  * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
  * @cachep: the cache we're checking against
@@ -3598,23 +3620,47 @@ out:
 #ifdef CONFIG_NUMA
 void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
-	return __cache_alloc_node(cachep, flags, nodeid,
-			__builtin_return_address(0));
+	void *ret = __cache_alloc_node(cachep, flags, nodeid,
+				       __builtin_return_address(0));
+
+	kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+				  obj_size(cachep), cachep->buffer_size,
+				  flags, nodeid);
+
+	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+				    gfp_t flags,
+				    int nodeid)
+{
+	return __cache_alloc_node(cachep, flags, nodeid,
+				  __builtin_return_address(0));
+}
+EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+#endif
+
 static __always_inline void *
 __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
 {
 	struct kmem_cache *cachep;
+	void *ret;
 
 	cachep = kmem_find_general_cachep(size, flags);
 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
 		return cachep;
-	return kmem_cache_alloc_node(cachep, flags, node);
+	ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
+
+	kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+				  (unsigned long) caller, ret,
+				  size, cachep->buffer_size, flags, node);
+
+	return ret;
 }
 
-#ifdef CONFIG_DEBUG_SLAB
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
 void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	return __do_kmalloc_node(size, flags, node,
@@ -3647,6 +3693,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 					  void *caller)
 {
 	struct kmem_cache *cachep;
+	void *ret;
 
 	/* If you want to save a few bytes .text space: replace
 	 * __ with kmem_.
@@ -3656,11 +3703,17 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 	cachep = __find_general_cachep(size, flags);
 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
 		return cachep;
-	return __cache_alloc(cachep, flags, caller);
+	ret = __cache_alloc(cachep, flags, caller);
+
+	kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
+			     (unsigned long) caller, ret,
+			     size, cachep->buffer_size, flags);
+
+	return ret;
 }
 
 
-#ifdef CONFIG_DEBUG_SLAB
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
 void *__kmalloc(size_t size, gfp_t flags)
 {
 	return __do_kmalloc(size, flags, __builtin_return_address(0));
@@ -3699,6 +3752,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 		debug_check_no_obj_freed(objp, obj_size(cachep));
 	__cache_free(cachep, objp);
 	local_irq_restore(flags);
+
+	kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, objp);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
@@ -3725,6 +3780,8 @@ void kfree(const void *objp)
 	debug_check_no_obj_freed(objp, obj_size(c));
 	__cache_free(c, (void *)objp);
 	local_irq_restore(flags);
+
+	kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, objp);
 }
 EXPORT_SYMBOL(kfree);
 
diff --git a/mm/slob.c b/mm/slob.c
index bf7e8fc..4d1c0fc 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -65,6 +65,7 @@
 #include <linux/module.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
+#include <trace/kmemtrace.h>
 #include <asm/atomic.h>
 
 /*
@@ -463,27 +464,38 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
 {
 	unsigned int *m;
 	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+	void *ret;
 
 	if (size < PAGE_SIZE - align) {
 		if (!size)
 			return ZERO_SIZE_PTR;
 
 		m = slob_alloc(size + align, gfp, align, node);
+
 		if (!m)
 			return NULL;
 		*m = size;
-		return (void *)m + align;
+		ret = (void *)m + align;
+
+		kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+					  _RET_IP_, ret,
+					  size, size + align, gfp, node);
 	} else {
-		void *ret;
+		unsigned int order = get_order(size);
 
-		ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node);
+		ret = slob_new_page(gfp | __GFP_COMP, order, node);
 		if (ret) {
 			struct page *page;
 			page = virt_to_page(ret);
 			page->private = size;
 		}
-		return ret;
+
+		kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+					  _RET_IP_, ret,
+					  size, PAGE_SIZE << order, gfp, node);
 	}
+
+	return ret;
 }
 EXPORT_SYMBOL(__kmalloc_node);
 
@@ -501,6 +513,8 @@ void kfree(const void *block)
 		slob_free(m, *m + align);
 	} else
 		put_page(&sp->page);
+
+	kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, block);
 }
 EXPORT_SYMBOL(kfree);
 
@@ -569,10 +583,19 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 {
 	void *b;
 
-	if (c->size < PAGE_SIZE)
+	if (c->size < PAGE_SIZE) {
 		b = slob_alloc(c->size, flags, c->align, node);
-	else
+		kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
+					  _RET_IP_, b, c->size,
+					  SLOB_UNITS(c->size) * SLOB_UNIT,
+					  flags, node);
+	} else {
 		b = slob_new_page(flags, get_order(c->size), node);
+		kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
+					  _RET_IP_, b, c->size,
+					  PAGE_SIZE << get_order(c->size),
+					  flags, node);
+	}
 
 	if (c->ctor)
 		c->ctor(b);
@@ -608,6 +631,8 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
 	} else {
 		__kmem_cache_free(b, c->size);
 	}
+
+	kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, b);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
diff --git a/mm/slub.c b/mm/slub.c
index 6392ae5..f657c88 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <trace/kmemtrace.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/mempolicy.h>
@@ -1623,18 +1624,46 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 
 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
 {
-	return slab_alloc(s, gfpflags, -1, _RET_IP_);
+	void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
+
+	kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+			     s->objsize, s->size, gfpflags);
+
+	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc);
 
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+{
+	return slab_alloc(s, gfpflags, -1, _RET_IP_);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+#endif
+
 #ifdef CONFIG_NUMA
 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
-	return slab_alloc(s, gfpflags, node, _RET_IP_);
+	void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
+
+	kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+				  s->objsize, s->size, gfpflags, node);
+
+	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 #endif
 
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+				    gfp_t gfpflags,
+				    int node)
+{
+	return slab_alloc(s, gfpflags, node, _RET_IP_);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+#endif
+
 /*
  * Slow patch handling. This may still be called frequently since objects
  * have a longer lifetime than the cpu slabs in most processing loads.
@@ -1742,6 +1771,8 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
 	page = virt_to_head_page(x);
 
 	slab_free(s, page, x, _RET_IP_);
+
+	kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
@@ -2657,6 +2688,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
 void *__kmalloc(size_t size, gfp_t flags)
 {
 	struct kmem_cache *s;
+	void *ret;
 
 	if (unlikely(size > PAGE_SIZE))
 		return kmalloc_large(size, flags);
@@ -2666,7 +2698,12 @@ void *__kmalloc(size_t size, gfp_t flags)
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	return slab_alloc(s, flags, -1, _RET_IP_);
+	ret = slab_alloc(s, flags, -1, _RET_IP_);
+
+	kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
+			     size, s->size, flags);
+
+	return ret;
 }
 EXPORT_SYMBOL(__kmalloc);
 
@@ -2685,16 +2722,30 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
 void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	struct kmem_cache *s;
+	void *ret;
 
-	if (unlikely(size > PAGE_SIZE))
-		return kmalloc_large_node(size, flags, node);
+	if (unlikely(size > PAGE_SIZE)) {
+		ret = kmalloc_large_node(size, flags, node);
+
+		kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+					  _RET_IP_, ret,
+					  size, PAGE_SIZE << get_order(size),
+					  flags, node);
+
+		return ret;
+	}
 
 	s = get_slab(size, flags);
 
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	return slab_alloc(s, flags, node, _RET_IP_);
+	ret = slab_alloc(s, flags, node, _RET_IP_);
+
+	kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
+				  size, s->size, flags, node);
+
+	return ret;
 }
 EXPORT_SYMBOL(__kmalloc_node);
 #endif
@@ -2752,6 +2803,8 @@ void kfree(const void *x)
 		return;
 	}
 	slab_free(page->slab, page, object, _RET_IP_);
+
+	kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x);
 }
 EXPORT_SYMBOL(kfree);
 
@@ -3221,6 +3274,7 @@ static struct notifier_block __cpuinitdata slab_notifier = {
 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 {
 	struct kmem_cache *s;
+	void *ret;
 
 	if (unlikely(size > PAGE_SIZE))
 		return kmalloc_large(size, gfpflags);
@@ -3230,13 +3284,20 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	return slab_alloc(s, gfpflags, -1, caller);
+	ret = slab_alloc(s, gfpflags, -1, caller);
+
+	/* Honor the call site pointer we recieved. */
+	kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size,
+			     s->size, gfpflags);
+
+	return ret;
 }
 
 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 					int node, unsigned long caller)
 {
 	struct kmem_cache *s;
+	void *ret;
 
 	if (unlikely(size > PAGE_SIZE))
 		return kmalloc_large_node(size, gfpflags, node);
@@ -3246,7 +3307,13 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	return slab_alloc(s, gfpflags, node, caller);
+	ret = slab_alloc(s, gfpflags, node, caller);
+
+	/* Honor the call site pointer we recieved. */
+	kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret,
+				  size, s->size, gfpflags, node);
+
+	return ret;
 }
 
 #ifdef CONFIG_SLUB_DEBUG

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2008-11-29 19:34 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2008-11-29 19:34 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Lai Jiangshan (1):
      ftrace: prevent recursion

Liming Wang (1):
      function tracing: fix wrong position computing of stack_trace

Pekka Paalanen (2):
      x86, mmiotrace: fix buffer overrun detection
      tracing, doc: update mmiotrace documentation


 Documentation/tracers/mmiotrace.txt |    6 ++++--
 kernel/trace/ring_buffer.c          |    2 +-
 kernel/trace/trace_mmiotrace.c      |   16 +++++++---------
 kernel/trace/trace_stack.c          |   24 +++++++++++++++---------
 4 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
index 5bbbe20..cde23b4 100644
--- a/Documentation/tracers/mmiotrace.txt
+++ b/Documentation/tracers/mmiotrace.txt
@@ -37,7 +37,7 @@ $ echo mmiotrace > /debug/tracing/current_tracer
 $ cat /debug/tracing/trace_pipe > mydump.txt &
 Start X or whatever.
 $ echo "X is up" > /debug/tracing/trace_marker
-$ echo none > /debug/tracing/current_tracer
+$ echo nop > /debug/tracing/current_tracer
 Check for lost events.
 
 
@@ -66,7 +66,7 @@ which action. It is recommended to place descriptive markers about what you
 do.
 
 Shut down mmiotrace (requires root privileges):
-$ echo none > /debug/tracing/current_tracer
+$ echo nop > /debug/tracing/current_tracer
 The 'cat' process exits. If it does not, kill it by issuing 'fg' command and
 pressing ctrl+c.
 
@@ -81,7 +81,9 @@ are:
 $ cat /debug/tracing/trace_entries
 gives you a number. Approximately double this number and write it back, for
 instance:
+$ echo 0 > /debug/tracing/tracing_enabled
 $ echo 128000 > /debug/tracing/trace_entries
+$ echo 1 > /debug/tracing/tracing_enabled
 Then start again from the top.
 
 If you are doing a trace for a driver project, e.g. Nouveau, you should also
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f780e95..668bbb5 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1215,7 +1215,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 
  out:
 	if (resched)
-		preempt_enable_notrace();
+		preempt_enable_no_resched_notrace();
 	else
 		preempt_enable_notrace();
 	return NULL;
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index f284846..e62cbf7 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -18,12 +18,14 @@ struct header_iter {
 
 static struct trace_array *mmio_trace_array;
 static bool overrun_detected;
+static unsigned long prev_overruns;
 
 static void mmio_reset_data(struct trace_array *tr)
 {
 	int cpu;
 
 	overrun_detected = false;
+	prev_overruns = 0;
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
@@ -128,16 +130,12 @@ static void mmio_close(struct trace_iterator *iter)
 
 static unsigned long count_overruns(struct trace_iterator *iter)
 {
-	int cpu;
 	unsigned long cnt = 0;
-/* FIXME: */
-#if 0
-	for_each_online_cpu(cpu) {
-		cnt += iter->overrun[cpu];
-		iter->overrun[cpu] = 0;
-	}
-#endif
-	(void)cpu;
+	unsigned long over = ring_buffer_overruns(iter->tr->buffer);
+
+	if (over > prev_overruns)
+		cnt = over - prev_overruns;
+	prev_overruns = over;
 	return cnt;
 }
 
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index be682b6..3bdb44b 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -184,11 +184,16 @@ static struct file_operations stack_max_size_fops = {
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	long i = (long)m->private;
+	long i;
 
 	(*pos)++;
 
-	i++;
+	if (v == SEQ_START_TOKEN)
+		i = 0;
+	else {
+		i = *(long *)v;
+		i++;
+	}
 
 	if (i >= max_stack_trace.nr_entries ||
 	    stack_dump_trace[i] == ULONG_MAX)
@@ -201,12 +206,15 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-	void *t = &m->private;
+	void *t = SEQ_START_TOKEN;
 	loff_t l = 0;
 
 	local_irq_disable();
 	__raw_spin_lock(&max_stack_lock);
 
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
 	for (; t && l < *pos; t = t_next(m, t, &l))
 		;
 
@@ -235,10 +243,10 @@ static int trace_lookup_stack(struct seq_file *m, long i)
 
 static int t_show(struct seq_file *m, void *v)
 {
-	long i = *(long *)v;
+	long i;
 	int size;
 
-	if (i < 0) {
+	if (v == SEQ_START_TOKEN) {
 		seq_printf(m, "        Depth   Size      Location"
 			   "    (%d entries)\n"
 			   "        -----   ----      --------\n",
@@ -246,6 +254,8 @@ static int t_show(struct seq_file *m, void *v)
 		return 0;
 	}
 
+	i = *(long *)v;
+
 	if (i >= max_stack_trace.nr_entries ||
 	    stack_dump_trace[i] == ULONG_MAX)
 		return 0;
@@ -275,10 +285,6 @@ static int stack_trace_open(struct inode *inode, struct file *file)
 	int ret;
 
 	ret = seq_open(file, &stack_trace_seq_ops);
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-		m->private = (void *)-1;
-	}
 
 	return ret;
 }

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2008-11-20 11:26 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2008-11-20 11:26 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Julia Lawall (1):
      tracing: kernel/trace/trace.c: introduce missing kfree()

Steven Rostedt (3):
      ftrace: fix set_ftrace_filter
      ftrace: make filtered functions effective on setting
      ftrace: fix dyn ftrace filter selection

Vegard Nossum (1):
      trace: introduce missing mutex_unlock()


 kernel/trace/ftrace.c      |  113 +++++++++++++++++++++-----------------------
 kernel/trace/ring_buffer.c |    1 +
 kernel/trace/trace.c       |    1 +
 3 files changed, 56 insertions(+), 59 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e602057..78db083 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -326,96 +326,89 @@ ftrace_record_ip(unsigned long ip)
 
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec,
-		      unsigned char *old, unsigned char *new, int enable)
+		      unsigned char *nop, int enable)
 {
 	unsigned long ip, fl;
+	unsigned char *call, *old, *new;
 
 	ip = rec->ip;
 
-	if (ftrace_filtered && enable) {
+	/*
+	 * If this record is not to be traced and
+	 * it is not enabled then do nothing.
+	 *
+	 * If this record is not to be traced and
+	 * it is enabled then disabled it.
+	 *
+	 */
+	if (rec->flags & FTRACE_FL_NOTRACE) {
+		if (rec->flags & FTRACE_FL_ENABLED)
+			rec->flags &= ~FTRACE_FL_ENABLED;
+		else
+			return 0;
+
+	} else if (ftrace_filtered && enable) {
 		/*
-		 * If filtering is on:
-		 *
-		 * If this record is set to be filtered and
-		 * is enabled then do nothing.
-		 *
-		 * If this record is set to be filtered and
-		 * it is not enabled, enable it.
-		 *
-		 * If this record is not set to be filtered
-		 * and it is not enabled do nothing.
-		 *
-		 * If this record is set not to trace then
-		 * do nothing.
-		 *
-		 * If this record is set not to trace and
-		 * it is enabled then disable it.
-		 *
-		 * If this record is not set to be filtered and
-		 * it is enabled, disable it.
+		 * Filtering is on:
 		 */
 
-		fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE |
-				   FTRACE_FL_ENABLED);
+		fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
 
-		if ((fl ==  (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
-		    (fl ==  (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) ||
-		    !fl || (fl == FTRACE_FL_NOTRACE))
+		/* Record is filtered and enabled, do nothing */
+		if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
 			return 0;
 
-		/*
-		 * If it is enabled disable it,
-		 * otherwise enable it!
-		 */
-		if (fl & FTRACE_FL_ENABLED) {
-			/* swap new and old */
-			new = old;
-			old = ftrace_call_replace(ip, FTRACE_ADDR);
+		/* Record is not filtered and is not enabled do nothing */
+		if (!fl)
+			return 0;
+
+		/* Record is not filtered but enabled, disable it */
+		if (fl == FTRACE_FL_ENABLED)
 			rec->flags &= ~FTRACE_FL_ENABLED;
-		} else {
-			new = ftrace_call_replace(ip, FTRACE_ADDR);
+		else
+		/* Otherwise record is filtered but not enabled, enable it */
 			rec->flags |= FTRACE_FL_ENABLED;
-		}
 	} else {
+		/* Disable or not filtered */
 
 		if (enable) {
-			/*
-			 * If this record is set not to trace and is
-			 * not enabled, do nothing.
-			 */
-			fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
-			if (fl == FTRACE_FL_NOTRACE)
-				return 0;
-
-			new = ftrace_call_replace(ip, FTRACE_ADDR);
-		} else
-			old = ftrace_call_replace(ip, FTRACE_ADDR);
-
-		if (enable) {
+			/* if record is enabled, do nothing */
 			if (rec->flags & FTRACE_FL_ENABLED)
 				return 0;
+
 			rec->flags |= FTRACE_FL_ENABLED;
+
 		} else {
+
+			/* if record is not enabled do nothing */
 			if (!(rec->flags & FTRACE_FL_ENABLED))
 				return 0;
+
 			rec->flags &= ~FTRACE_FL_ENABLED;
 		}
 	}
 
+	call = ftrace_call_replace(ip, FTRACE_ADDR);
+
+	if (rec->flags & FTRACE_FL_ENABLED) {
+		old = nop;
+		new = call;
+	} else {
+		old = call;
+		new = nop;
+	}
+
 	return ftrace_modify_code(ip, old, new);
 }
 
 static void ftrace_replace_code(int enable)
 {
 	int i, failed;
-	unsigned char *new = NULL, *old = NULL;
+	unsigned char *nop = NULL;
 	struct dyn_ftrace *rec;
 	struct ftrace_page *pg;
 
-	if (enable)
-		old = ftrace_nop_replace();
-	else
-		new = ftrace_nop_replace();
+	nop = ftrace_nop_replace();
 
 	for (pg = ftrace_pages_start; pg; pg = pg->next) {
 		for (i = 0; i < pg->index; i++) {
@@ -433,7 +426,7 @@ static void ftrace_replace_code(int enable)
 				unfreeze_record(rec);
 			}
 
-			failed = __ftrace_replace_code(rec, old, new, enable);
+			failed = __ftrace_replace_code(rec, nop, enable);
 			if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
 				rec->flags |= FTRACE_FL_FAILED;
 				if ((system_state == SYSTEM_BOOTING) ||
@@ -534,8 +527,7 @@ static void ftrace_startup(void)
 
 	mutex_lock(&ftrace_start_lock);
 	ftrace_start++;
-	if (ftrace_start == 1)
-		command |= FTRACE_ENABLE_CALLS;
+	command |= FTRACE_ENABLE_CALLS;
 
 	if (saved_ftrace_func != ftrace_trace_function) {
 		saved_ftrace_func = ftrace_trace_function;
@@ -734,6 +726,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		    ((iter->flags & FTRACE_ITER_FAILURES) &&
 		     !(rec->flags & FTRACE_FL_FAILED)) ||
 
+		    ((iter->flags & FTRACE_ITER_FILTER) &&
+		     !(rec->flags & FTRACE_FL_FILTER)) ||
+
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
 		     !(rec->flags & FTRACE_FL_NOTRACE))) {
 			rec = NULL;
@@ -1186,7 +1181,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 
 	mutex_lock(&ftrace_sysctl_lock);
 	mutex_lock(&ftrace_start_lock);
-	if (iter->filtered && ftrace_start && ftrace_enabled)
+	if (ftrace_start && ftrace_enabled)
 		ftrace_run_update_code(FTRACE_ENABLE_CALLS);
 	mutex_unlock(&ftrace_start_lock);
 	mutex_unlock(&ftrace_sysctl_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 036456c..f780e95 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -617,6 +617,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 		list_del_init(&page->list);
 		free_buffer_page(page);
 	}
+	mutex_unlock(&buffer->mutex);
 	return -ENOMEM;
 }
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 697eda3..d86e325 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1936,6 +1936,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
 	}
 	mutex_unlock(&trace_types_lock);
+	kfree(iter);
 
 	return ERR_PTR(-ENOMEM);
 }

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2008-11-18 14:46 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2008-11-18 14:46 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Ingo Molnar (1):
      tracing: fix mmiotrace resizing crash

Rakib Mullick (1):
      kernel/profile.c: fix section mismatch warning

Steven Rostedt (2):
      ring-buffer: buffer record on/off switch
      ring-buffer: no preempt for sched_clock()

walimis (1):
      function tracing: fix wrong pos computing when read buffer has been fulfilled


 include/linux/ring_buffer.h |    3 +
 kernel/profile.c            |    2 +-
 kernel/trace/ftrace.c       |   34 ++++++------
 kernel/trace/ring_buffer.c  |  115 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 135 insertions(+), 19 deletions(-)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 536b0ca..e097c2e 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -120,6 +120,9 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
 u64 ring_buffer_time_stamp(int cpu);
 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
 
+void tracing_on(void);
+void tracing_off(void);
+
 enum ring_buffer_flags {
 	RB_FL_OVERWRITE		= 1 << 0,
 };
diff --git a/kernel/profile.c b/kernel/profile.c
index 9830a03..5b7d1ac 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = {
 };
 
 #ifdef CONFIG_SMP
-static void __init profile_nop(void *unused)
+static inline void profile_nop(void *unused)
 {
 }
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4a39d24..e602057 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -185,7 +185,6 @@ enum {
 };
 
 static int ftrace_filtered;
-static int tracing_on;
 
 static LIST_HEAD(ftrace_new_addrs);
 
@@ -506,13 +505,10 @@ static int __ftrace_modify_code(void *data)
 {
 	int *command = data;
 
-	if (*command & FTRACE_ENABLE_CALLS) {
+	if (*command & FTRACE_ENABLE_CALLS)
 		ftrace_replace_code(1);
-		tracing_on = 1;
-	} else if (*command & FTRACE_DISABLE_CALLS) {
+	else if (*command & FTRACE_DISABLE_CALLS)
 		ftrace_replace_code(0);
-		tracing_on = 0;
-	}
 
 	if (*command & FTRACE_UPDATE_TRACE_FUNC)
 		ftrace_update_ftrace_func(ftrace_trace_function);
@@ -677,7 +673,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
 
 	cnt = num_to_init / ENTRIES_PER_PAGE;
 	pr_info("ftrace: allocating %ld entries in %d pages\n",
-		num_to_init, cnt);
+		num_to_init, cnt + 1);
 
 	for (i = 0; i < cnt; i++) {
 		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -757,13 +753,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	void *p = NULL;
 	loff_t l = -1;
 
-	if (*pos != iter->pos) {
-		for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
-			;
-	} else {
-		l = *pos;
-		p = t_next(m, p, &l);
-	}
+	if (*pos > iter->pos)
+		*pos = iter->pos;
+
+	l = *pos;
+	p = t_next(m, p, &l);
 
 	return p;
 }
@@ -774,15 +768,21 @@ static void t_stop(struct seq_file *m, void *p)
 
 static int t_show(struct seq_file *m, void *v)
 {
+	struct ftrace_iterator *iter = m->private;
 	struct dyn_ftrace *rec = v;
 	char str[KSYM_SYMBOL_LEN];
+	int ret = 0;
 
 	if (!rec)
 		return 0;
 
 	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
 
-	seq_printf(m, "%s\n", str);
+	ret = seq_printf(m, "%s\n", str);
+	if (ret < 0) {
+		iter->pos--;
+		iter->idx--;
+	}
 
 	return 0;
 }
@@ -808,7 +808,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
 		return -ENOMEM;
 
 	iter->pg = ftrace_pages_start;
-	iter->pos = -1;
+	iter->pos = 0;
 
 	ret = seq_open(file, &show_ftrace_seq_ops);
 	if (!ret) {
@@ -895,7 +895,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
 
 	if (file->f_mode & FMODE_READ) {
 		iter->pg = ftrace_pages_start;
-		iter->pos = -1;
+		iter->pos = 0;
 		iter->flags = enable ? FTRACE_ITER_FILTER :
 			FTRACE_ITER_NOTRACE;
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2f76193..036456c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -16,14 +16,49 @@
 #include <linux/list.h>
 #include <linux/fs.h>
 
+#include "trace.h"
+
+/* Global flag to disable all recording to ring buffers */
+static int ring_buffers_off __read_mostly;
+
+/**
+ * tracing_on - enable all tracing buffers
+ *
+ * This function enables all tracing buffers that may have been
+ * disabled with tracing_off.
+ */
+void tracing_on(void)
+{
+	ring_buffers_off = 0;
+}
+
+/**
+ * tracing_off - turn off all tracing buffers
+ *
+ * This function stops all tracing buffers from recording data.
+ * It does not disable any overhead the tracers themselves may
+ * be causing. This function simply causes all recording to
+ * the ring buffers to fail.
+ */
+void tracing_off(void)
+{
+	ring_buffers_off = 1;
+}
+
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
 
 /* FIXME!!! */
 u64 ring_buffer_time_stamp(int cpu)
 {
+	u64 time;
+
+	preempt_disable_notrace();
 	/* shift to debug/test normalization and TIME_EXTENTS */
-	return sched_clock() << DEBUG_SHIFT;
+	time = sched_clock() << DEBUG_SHIFT;
+	preempt_enable_notrace();
+
+	return time;
 }
 
 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
@@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 	LIST_HEAD(pages);
 	int i, cpu;
 
+	/*
+	 * Always succeed at resizing a non-existent buffer:
+	 */
+	if (!buffer)
+		return size;
+
 	size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 	size *= BUF_PAGE_SIZE;
 	buffer_size = buffer->pages * BUF_PAGE_SIZE;
@@ -1133,6 +1174,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 	struct ring_buffer_event *event;
 	int cpu, resched;
 
+	if (ring_buffers_off)
+		return NULL;
+
 	if (atomic_read(&buffer->record_disabled))
 		return NULL;
 
@@ -1249,6 +1293,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
 	int ret = -EBUSY;
 	int cpu, resched;
 
+	if (ring_buffers_off)
+		return -EBUSY;
+
 	if (atomic_read(&buffer->record_disabled))
 		return -EBUSY;
 
@@ -2070,3 +2117,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
 	return 0;
 }
 
+static ssize_t
+rb_simple_read(struct file *filp, char __user *ubuf,
+	       size_t cnt, loff_t *ppos)
+{
+	int *p = filp->private_data;
+	char buf[64];
+	int r;
+
+	/* !ring_buffers_off == tracing_on */
+	r = sprintf(buf, "%d\n", !*p);
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+rb_simple_write(struct file *filp, const char __user *ubuf,
+		size_t cnt, loff_t *ppos)
+{
+	int *p = filp->private_data;
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	/* !ring_buffers_off == tracing_on */
+	*p = !val;
+
+	(*ppos)++;
+
+	return cnt;
+}
+
+static struct file_operations rb_simple_fops = {
+	.open		= tracing_open_generic,
+	.read		= rb_simple_read,
+	.write		= rb_simple_write,
+};
+
+
+static __init int rb_init_debugfs(void)
+{
+	struct dentry *d_tracer;
+	struct dentry *entry;
+
+	d_tracer = tracing_init_dentry();
+
+	entry = debugfs_create_file("tracing_on", 0644, d_tracer,
+				    &ring_buffers_off, &rb_simple_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs 'tracing_on' entry\n");
+
+	return 0;
+}
+
+fs_initcall(rb_init_debugfs);

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2008-11-11 18:24 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2008-11-11 18:24 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

the large linecount comes from the ftrace.txt update, which was 
getting pretty stale so we took it a bit ahead.

 Thanks,

	Ingo

------------------>
Eric Anholt (1):
      ftrace: fix breakage in bin_fmt results

Steven Rostedt (4):
      ftrace: update txt document
      ftrace: ftrace.txt version update
      ftrace: disable tracing on resize
      ring-buffer: prevent infinite looping on time stamping


 Documentation/ftrace.txt   |  171 +++++++++++++++++++-------------------------
 kernel/trace/ring_buffer.c |    2 +-
 kernel/trace/trace.c       |   19 +++++-
 3 files changed, 91 insertions(+), 101 deletions(-)

diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index ea5a827..9cc4d68 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -8,7 +8,7 @@ Copyright 2008 Red Hat Inc.
 Reviewers:   Elias Oltmanns, Randy Dunlap, Andrew Morton,
 	     John Kacur, and David Teigland.
 
-Written for: 2.6.27-rc1
+Written for: 2.6.28-rc2
 
 Introduction
 ------------
@@ -50,26 +50,26 @@ of ftrace. Here is a list of some of the key files:
 
  Note: all time values are in microseconds.
 
-  current_tracer : This is used to set or display the current tracer
+  current_tracer: This is used to set or display the current tracer
 		that is configured.
 
-  available_tracers : This holds the different types of tracers that
+  available_tracers: This holds the different types of tracers that
 		have been compiled into the kernel. The tracers
 		listed here can be configured by echoing their name
 		into current_tracer.
 
-  tracing_enabled : This sets or displays whether the current_tracer
+  tracing_enabled: This sets or displays whether the current_tracer
 		is activated and tracing or not. Echo 0 into this
 		file to disable the tracer or 1 to enable it.
 
-  trace : This file holds the output of the trace in a human readable
+  trace: This file holds the output of the trace in a human readable
 		format (described below).
 
-  latency_trace : This file shows the same trace but the information
+  latency_trace: This file shows the same trace but the information
 		is organized more to display possible latencies
 		in the system (described below).
 
-  trace_pipe : The output is the same as the "trace" file but this
+  trace_pipe: The output is the same as the "trace" file but this
 		file is meant to be streamed with live tracing.
 		Reads from this file will block until new data
 		is retrieved. Unlike the "trace" and "latency_trace"
@@ -82,11 +82,11 @@ of ftrace. Here is a list of some of the key files:
 		tracer is not adding more data, they will display
 		the same information every time they are read.
 
-  iter_ctrl : This file lets the user control the amount of data
+  iter_ctrl: This file lets the user control the amount of data
 		that is displayed in one of the above output
 		files.
 
-  trace_max_latency : Some of the tracers record the max latency.
+  trace_max_latency: Some of the tracers record the max latency.
 		For example, the time interrupts are disabled.
 		This time is saved in this file. The max trace
 		will also be stored, and displayed by either
@@ -94,29 +94,26 @@ of ftrace. Here is a list of some of the key files:
 		only be recorded if the latency is greater than
 		the value in this file. (in microseconds)
 
-  trace_entries : This sets or displays the number of trace
-		entries each CPU buffer can hold. The tracer buffers
-		are the same size for each CPU. The displayed number
-		is the size of the CPU buffer and not total size. The
+  trace_entries: This sets or displays the number of bytes each CPU
+		buffer can hold. The tracer buffers are the same size
+		for each CPU. The displayed number is the size of the
+		 CPU buffer and not total size of all buffers. The
 		trace buffers are allocated in pages (blocks of memory
 		that the kernel uses for allocation, usually 4 KB in size).
-		Since each entry is smaller than a page, if the last
-		allocated page has room for more entries than were
-		requested, the rest of the page is used to allocate
-		entries.
+		If the last page allocated has room for more bytes
+		than requested, the rest of the page will be used,
+		making the actual allocation bigger than requested.
+		(Note, the size may not be a multiple of the page size due
+		to buffer managment overhead.)
 
 		This can only be updated when the current_tracer
-		is set to "none".
+		is set to "nop".
 
-		NOTE: It is planned on changing the allocated buffers
-		      from being the number of possible CPUS to
-		      the number of online CPUS.
-
-  tracing_cpumask : This is a mask that lets the user only trace
+  tracing_cpumask: This is a mask that lets the user only trace
 		on specified CPUS. The format is a hex string
 		representing the CPUS.
 
-  set_ftrace_filter : When dynamic ftrace is configured in (see the
+  set_ftrace_filter: When dynamic ftrace is configured in (see the
 		section below "dynamic ftrace"), the code is dynamically
 		modified (code text rewrite) to disable calling of the
 		function profiler (mcount). This lets tracing be configured
@@ -130,14 +127,11 @@ of ftrace. Here is a list of some of the key files:
 		be traced. If a function exists in both set_ftrace_filter
 		and set_ftrace_notrace,	the function will _not_ be traced.
 
-  available_filter_functions : When a function is encountered the first
-		time by the dynamic tracer, it is recorded and
-		later the call is converted into a nop. This file
-		lists the functions that have been recorded
-		by the dynamic tracer and these functions can
-		be used to set the ftrace filter by the above
-		"set_ftrace_filter" file. (See the section "dynamic ftrace"
-		below for more details).
+  available_filter_functions: This lists the functions that ftrace
+		has processed and can trace. These are the function
+		names that you can pass to "set_ftrace_filter" or
+		"set_ftrace_notrace". (See the section "dynamic ftrace"
+		below for more details.)
 
 
 The Tracers
@@ -145,7 +139,7 @@ The Tracers
 
 Here is the list of current tracers that may be configured.
 
-  ftrace - function tracer that uses mcount to trace all functions.
+  function - function tracer that uses mcount to trace all functions.
 
   sched_switch - traces the context switches between tasks.
 
@@ -166,8 +160,8 @@ Here is the list of current tracers that may be configured.
 		the highest priority task to get scheduled after
 		it has been woken up.
 
-  none - This is not a tracer. To remove all tracers from tracing
-		simply echo "none" into current_tracer.
+  nop - This is not a tracer. To remove all tracers from tracing
+		simply echo "nop" into current_tracer.
 
 
 Examples of using the tracer
@@ -182,7 +176,7 @@ Output format:
 Here is an example of the output format of the file "trace"
 
                              --------
-# tracer: ftrace
+# tracer: function
 #
 #           TASK-PID   CPU#    TIMESTAMP  FUNCTION
 #              | |      |          |         |
@@ -192,7 +186,7 @@ Here is an example of the output format of the file "trace"
                              --------
 
 A header is printed with the tracer name that is represented by the trace.
-In this case the tracer is "ftrace". Then a header showing the format. Task
+In this case the tracer is "function". Then a header showing the format. Task
 name "bash", the task PID "4251", the CPU that it was running on
 "01", the timestamp in <secs>.<usecs> format, the function name that was
 traced "path_put" and the parent function that called this function
@@ -1003,22 +997,20 @@ is the stack for the hard interrupt. This hides the fact that NEED_RESCHED
 has been set. We do not see the 'N' until we switch back to the task's
 assigned stack.
 
-ftrace
-------
+function
+--------
 
-ftrace is not only the name of the tracing infrastructure, but it
-is also a name of one of the tracers. The tracer is the function
-tracer. Enabling the function tracer can be done from the
-debug file system. Make sure the ftrace_enabled is set otherwise
-this tracer is a nop.
+This tracer is the function tracer. Enabling the function tracer
+can be done from the debug file system. Make sure the ftrace_enabled is
+set; otherwise this tracer is a nop.
 
  # sysctl kernel.ftrace_enabled=1
- # echo ftrace > /debug/tracing/current_tracer
+ # echo function > /debug/tracing/current_tracer
  # echo 1 > /debug/tracing/tracing_enabled
  # usleep 1
  # echo 0 > /debug/tracing/tracing_enabled
  # cat /debug/tracing/trace
-# tracer: ftrace
+# tracer: function
 #
 #           TASK-PID   CPU#    TIMESTAMP  FUNCTION
 #              | |      |          |         |
@@ -1040,10 +1032,10 @@ this tracer is a nop.
 [...]
 
 
-Note: ftrace uses ring buffers to store the above entries. The newest data
-may overwrite the oldest data. Sometimes using echo to stop the trace
-is not sufficient because the tracing could have overwritten the data
-that you wanted to record. For this reason, it is sometimes better to
+Note: function tracer uses ring buffers to store the above entries.
+The newest data may overwrite the oldest data. Sometimes using echo to
+stop the trace is not sufficient because the tracing could have overwritten
+the data that you wanted to record. For this reason, it is sometimes better to
 disable tracing directly from a program. This allows you to stop the
 tracing at the point that you hit the part that you are interested in.
 To disable the tracing directly from a C program, something like following
@@ -1077,18 +1069,31 @@ every kernel function, produced by the -pg switch in gcc), starts
 of pointing to a simple return. (Enabling FTRACE will include the
 -pg switch in the compiling of the kernel.)
 
-When dynamic ftrace is initialized, it calls kstop_machine to make
-the machine act like a uniprocessor so that it can freely modify code
-without worrying about other processors executing that same code.  At
-initialization, the mcount calls are changed to call a "record_ip"
-function.  After this, the first time a kernel function is called,
-it has the calling address saved in a hash table.
-
-Later on the ftraced kernel thread is awoken and will again call
-kstop_machine if new functions have been recorded. The ftraced thread
-will change all calls to mcount to "nop".  Just calling mcount
-and having mcount return has shown a 10% overhead. By converting
-it to a nop, there is no measurable overhead to the system.
+At compile time every C file object is run through the
+recordmcount.pl script (located in the scripts directory). This
+script will process the C object using objdump to find all the
+locations in the .text section that call mcount. (Note, only
+the .text section is processed, since processing other sections
+like .init.text may cause races due to those sections being freed).
+
+A new section called "__mcount_loc" is created that holds references
+to all the mcount call sites in the .text section. This section is
+compiled back into the original object. The final linker will add
+all these references into a single table.
+
+On boot up, before SMP is initialized, the dynamic ftrace code
+scans this table and updates all the locations into nops. It also
+records the locations, which are added to the available_filter_functions
+list.  Modules are processed as they are loaded and before they are
+executed.  When a module is unloaded, it also removes its functions from
+the ftrace function list. This is automatic in the module unload
+code, and the module author does not need to worry about it.
+
+When tracing is enabled, kstop_machine is called to prevent races
+with the CPUS executing code being modified (which can cause the
+CPU to do undesireable things), and the nops are patched back
+to calls. But this time, they do not call mcount (which is just
+a function stub). They now call into the ftrace infrastructure.
 
 One special side-effect to the recording of the functions being
 traced is that we can now selectively choose which functions we
@@ -1251,36 +1256,6 @@ Produces:
 
 We can see that there's no more lock or preempt tracing.
 
-ftraced
--------
-
-As mentioned above, when dynamic ftrace is configured in, a kernel
-thread wakes up once a second and checks to see if there are mcount
-calls that need to be converted into nops. If there are not any, then
-it simply goes back to sleep. But if there are some, it will call
-kstop_machine to convert the calls to nops.
-
-There may be a case in which you do not want this added latency.
-Perhaps you are doing some audio recording and this activity might
-cause skips in the playback. There is an interface to disable
-and enable the "ftraced" kernel thread.
-
- # echo 0 > /debug/tracing/ftraced_enabled
-
-This will disable the calling of kstop_machine to update the
-mcount calls to nops. Remember that there is a large overhead
-to calling mcount. Without this kernel thread, that overhead will
-exist.
-
-If there are recorded calls to mcount, any write to the ftraced_enabled
-file will cause the kstop_machine to run. This means that a
-user can manually perform the updates when they want to by simply
-echoing a '0' into the ftraced_enabled file.
-
-The updates are also done at the beginning of enabling a tracer
-that uses ftrace function recording.
-
-
 trace_pipe
 ----------
 
@@ -1289,14 +1264,14 @@ on the tracing is different. Every read from trace_pipe is consumed.
 This means that subsequent reads will be different. The trace
 is live.
 
- # echo ftrace > /debug/tracing/current_tracer
+ # echo function > /debug/tracing/current_tracer
  # cat /debug/tracing/trace_pipe > /tmp/trace.out &
 [1] 4153
  # echo 1 > /debug/tracing/tracing_enabled
  # usleep 1
  # echo 0 > /debug/tracing/tracing_enabled
  # cat /debug/tracing/trace
-# tracer: ftrace
+# tracer: function
 #
 #           TASK-PID   CPU#    TIMESTAMP  FUNCTION
 #              | |      |          |         |
@@ -1317,7 +1292,7 @@ is live.
 
 Note, reading the trace_pipe file will block until more input is added.
 By changing the tracer, trace_pipe will issue an EOF. We needed
-to set the ftrace tracer _before_ cating the trace_pipe file.
+to set the function tracer _before_ we "cat" the trace_pipe file.
 
 
 trace entries
@@ -1334,10 +1309,10 @@ number of entries.
 65620
 
 Note, to modify this, you must have tracing completely disabled. To do that,
-echo "none" into the current_tracer. If the current_tracer is not set
-to "none", an EINVAL error will be returned.
+echo "nop" into the current_tracer. If the current_tracer is not set
+to "nop", an EINVAL error will be returned.
 
- # echo none > /debug/tracing/current_tracer
+ # echo nop > /debug/tracing/current_tracer
  # echo 100000 > /debug/tracing/trace_entries
  # cat /debug/tracing/trace_entries
 100045
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3f33806..2f76193 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1060,7 +1060,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 
 		/* Did the write stamp get updated already? */
 		if (unlikely(ts < cpu_buffer->write_stamp))
-			goto again;
+			delta = 0;
 
 		if (test_time_stamp(delta)) {
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9f3b478..697eda3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1755,7 +1755,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 		return TRACE_TYPE_HANDLED;
 
 	SEQ_PUT_FIELD_RET(s, entry->pid);
-	SEQ_PUT_FIELD_RET(s, iter->cpu);
+	SEQ_PUT_FIELD_RET(s, entry->cpu);
 	SEQ_PUT_FIELD_RET(s, iter->ts);
 
 	switch (entry->type) {
@@ -2676,7 +2676,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 {
 	unsigned long val;
 	char buf[64];
-	int ret;
+	int ret, cpu;
 	struct trace_array *tr = filp->private_data;
 
 	if (cnt >= sizeof(buf))
@@ -2704,6 +2704,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 		goto out;
 	}
 
+	/* disable all cpu buffers */
+	for_each_tracing_cpu(cpu) {
+		if (global_trace.data[cpu])
+			atomic_inc(&global_trace.data[cpu]->disabled);
+		if (max_tr.data[cpu])
+			atomic_inc(&max_tr.data[cpu]->disabled);
+	}
+
 	if (val != global_trace.entries) {
 		ret = ring_buffer_resize(global_trace.buffer, val);
 		if (ret < 0) {
@@ -2735,6 +2743,13 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 	if (tracing_disabled)
 		cnt = -ENOMEM;
  out:
+	for_each_tracing_cpu(cpu) {
+		if (global_trace.data[cpu])
+			atomic_dec(&global_trace.data[cpu]->disabled);
+		if (max_tr.data[cpu])
+			atomic_dec(&max_tr.data[cpu]->disabled);
+	}
+
 	max_tr.entries = global_trace.entries;
 	mutex_unlock(&trace_types_lock);
 

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2008-11-03 18:03 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2008-11-03 18:03 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Andrew Morton, Steven Rostedt

Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

 Thanks,

	Ingo

------------------>
Al Viro (1):
      tracing, alpha: undefined reference to `save_stack_trace'

Steven Rostedt (2):
      ftrace: use kretprobe trampoline name to test in output
      tracing, ring-buffer: add paranoid checks for loops


 kernel/trace/Kconfig       |    2 +-
 kernel/trace/ring_buffer.c |   56 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.c       |   41 ++++++++++++++++++--------------
 3 files changed, 80 insertions(+), 19 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b58f43b..33dbefd 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -25,7 +25,7 @@ config TRACING
 	bool
 	select DEBUG_FS
 	select RING_BUFFER
-	select STACKTRACE
+	select STACKTRACE if STACKTRACE_SUPPORT
 	select TRACEPOINTS
 	select NOP_TRACER
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cedf4e2..3f33806 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1022,8 +1022,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	struct ring_buffer_event *event;
 	u64 ts, delta;
 	int commit = 0;
+	int nr_loops = 0;
 
  again:
+	/*
+	 * We allow for interrupts to reenter here and do a trace.
+	 * If one does, it will cause this original code to loop
+	 * back here. Even with heavy interrupts happening, this
+	 * should only happen a few times in a row. If this happens
+	 * 1000 times in a row, there must be either an interrupt
+	 * storm or we have something buggy.
+	 * Bail!
+	 */
+	if (unlikely(++nr_loops > 1000)) {
+		RB_WARN_ON(cpu_buffer, 1);
+		return NULL;
+	}
+
 	ts = ring_buffer_time_stamp(cpu_buffer->cpu);
 
 	/*
@@ -1532,10 +1547,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	struct buffer_page *reader = NULL;
 	unsigned long flags;
+	int nr_loops = 0;
 
 	spin_lock_irqsave(&cpu_buffer->lock, flags);
 
  again:
+	/*
+	 * This should normally only loop twice. But because the
+	 * start of the reader inserts an empty page, it causes
+	 * a case where we will loop three times. There should be no
+	 * reason to loop four times (that I know of).
+	 */
+	if (unlikely(++nr_loops > 3)) {
+		RB_WARN_ON(cpu_buffer, 1);
+		reader = NULL;
+		goto out;
+	}
+
 	reader = cpu_buffer->reader_page;
 
 	/* If there's more to read, return this page */
@@ -1665,6 +1693,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
 	struct buffer_page *reader;
+	int nr_loops = 0;
 
 	if (!cpu_isset(cpu, buffer->cpumask))
 		return NULL;
@@ -1672,6 +1701,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 	cpu_buffer = buffer->buffers[cpu];
 
  again:
+	/*
+	 * We repeat when a timestamp is encountered. It is possible
+	 * to get multiple timestamps from an interrupt entering just
+	 * as one timestamp is about to be written. The max times
+	 * that this can happen is the number of nested interrupts we
+	 * can have.  Nesting 10 deep of interrupts is clearly
+	 * an anomaly.
+	 */
+	if (unlikely(++nr_loops > 10)) {
+		RB_WARN_ON(cpu_buffer, 1);
+		return NULL;
+	}
+
 	reader = rb_get_reader_page(cpu_buffer);
 	if (!reader)
 		return NULL;
@@ -1722,6 +1764,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 	struct ring_buffer *buffer;
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
+	int nr_loops = 0;
 
 	if (ring_buffer_iter_empty(iter))
 		return NULL;
@@ -1730,6 +1773,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 	buffer = cpu_buffer->buffer;
 
  again:
+	/*
+	 * We repeat when a timestamp is encountered. It is possible
+	 * to get multiple timestamps from an interrupt entering just
+	 * as one timestamp is about to be written. The max times
+	 * that this can happen is the number of nested interrupts we
+	 * can have. Nesting 10 deep of interrupts is clearly
+	 * an anomaly.
+	 */
+	if (unlikely(++nr_loops > 10)) {
+		RB_WARN_ON(cpu_buffer, 1);
+		return NULL;
+	}
+
 	if (rb_per_cpu_empty(cpu_buffer))
 		return NULL;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a499e2..9f3b478 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -705,6 +705,7 @@ static void ftrace_trace_stack(struct trace_array *tr,
 			       unsigned long flags,
 			       int skip, int pc)
 {
+#ifdef CONFIG_STACKTRACE
 	struct ring_buffer_event *event;
 	struct stack_entry *entry;
 	struct stack_trace trace;
@@ -730,6 +731,7 @@ static void ftrace_trace_stack(struct trace_array *tr,
 
 	save_stack_trace(&trace);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+#endif
 }
 
 void __trace_stack(struct trace_array *tr,
@@ -1086,17 +1088,20 @@ static void s_stop(struct seq_file *m, void *p)
 	mutex_unlock(&trace_types_lock);
 }
 
-#define KRETPROBE_MSG "[unknown/kretprobe'd]"
-
 #ifdef CONFIG_KRETPROBES
-static inline int kretprobed(unsigned long addr)
+static inline const char *kretprobed(const char *name)
 {
-	return addr == (unsigned long)kretprobe_trampoline;
+	static const char tramp_name[] = "kretprobe_trampoline";
+	int size = sizeof(tramp_name);
+
+	if (strncmp(tramp_name, name, size) == 0)
+		return "[unknown/kretprobe'd]";
+	return name;
 }
 #else
-static inline int kretprobed(unsigned long addr)
+static inline const char *kretprobed(const char *name)
 {
-	return 0;
+	return name;
 }
 #endif /* CONFIG_KRETPROBES */
 
@@ -1105,10 +1110,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
 {
 #ifdef CONFIG_KALLSYMS
 	char str[KSYM_SYMBOL_LEN];
+	const char *name;
 
 	kallsyms_lookup(address, NULL, NULL, NULL, str);
 
-	return trace_seq_printf(s, fmt, str);
+	name = kretprobed(str);
+
+	return trace_seq_printf(s, fmt, name);
 #endif
 	return 1;
 }
@@ -1119,9 +1127,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
 {
 #ifdef CONFIG_KALLSYMS
 	char str[KSYM_SYMBOL_LEN];
+	const char *name;
 
 	sprint_symbol(str, address);
-	return trace_seq_printf(s, fmt, str);
+	name = kretprobed(str);
+
+	return trace_seq_printf(s, fmt, name);
 #endif
 	return 1;
 }
@@ -1375,10 +1386,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 
 		seq_print_ip_sym(s, field->ip, sym_flags);
 		trace_seq_puts(s, " (");
-		if (kretprobed(field->parent_ip))
-			trace_seq_puts(s, KRETPROBE_MSG);
-		else
-			seq_print_ip_sym(s, field->parent_ip, sym_flags);
+		seq_print_ip_sym(s, field->parent_ip, sym_flags);
 		trace_seq_puts(s, ")\n");
 		break;
 	}
@@ -1494,12 +1502,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 			ret = trace_seq_printf(s, " <-");
 			if (!ret)
 				return TRACE_TYPE_PARTIAL_LINE;
-			if (kretprobed(field->parent_ip))
-				ret = trace_seq_puts(s, KRETPROBE_MSG);
-			else
-				ret = seq_print_ip_sym(s,
-						       field->parent_ip,
-						       sym_flags);
+			ret = seq_print_ip_sym(s,
+					       field->parent_ip,
+					       sym_flags);
 			if (!ret)
 				return TRACE_TYPE_PARTIAL_LINE;
 		}

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2008-08-28 13:31 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2008-08-28 13:31 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Steven Rostedt, Andrew Morton


Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

Thanks,

	Ingo

------------------>
Rafael J. Wysocki (1):
      ftrace: disable tracing for hibernation


 kernel/power/disk.c |   13 ++++++++++---
 1 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index f011e08..bbd85c6 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -21,6 +21,7 @@
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/freezer.h>
+#include <linux/ftrace.h>
 
 #include "power.h"
 
@@ -255,7 +256,7 @@ static int create_image(int platform_mode)
 
 int hibernation_snapshot(int platform_mode)
 {
-	int error;
+	int error, ftrace_save;
 
 	/* Free memory before shutting down devices. */
 	error = swsusp_shrink_memory();
@@ -267,6 +268,7 @@ int hibernation_snapshot(int platform_mode)
 		goto Close;
 
 	suspend_console();
+	ftrace_save = __ftrace_enabled_save();
 	error = device_suspend(PMSG_FREEZE);
 	if (error)
 		goto Recover_platform;
@@ -296,6 +298,7 @@ int hibernation_snapshot(int platform_mode)
  Resume_devices:
 	device_resume(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+	__ftrace_enabled_restore(ftrace_save);
 	resume_console();
  Close:
 	platform_end(platform_mode);
@@ -366,10 +369,11 @@ static int resume_target_kernel(void)
 
 int hibernation_restore(int platform_mode)
 {
-	int error;
+	int error, ftrace_save;
 
 	pm_prepare_console();
 	suspend_console();
+	ftrace_save = __ftrace_enabled_save();
 	error = device_suspend(PMSG_QUIESCE);
 	if (error)
 		goto Finish;
@@ -384,6 +388,7 @@ int hibernation_restore(int platform_mode)
 	platform_restore_cleanup(platform_mode);
 	device_resume(PMSG_RECOVER);
  Finish:
+	__ftrace_enabled_restore(ftrace_save);
 	resume_console();
 	pm_restore_console();
 	return error;
@@ -396,7 +401,7 @@ int hibernation_restore(int platform_mode)
 
 int hibernation_platform_enter(void)
 {
-	int error;
+	int error, ftrace_save;
 
 	if (!hibernation_ops)
 		return -ENOSYS;
@@ -411,6 +416,7 @@ int hibernation_platform_enter(void)
 		goto Close;
 
 	suspend_console();
+	ftrace_save = __ftrace_enabled_save();
 	error = device_suspend(PMSG_HIBERNATE);
 	if (error) {
 		if (hibernation_ops->recover)
@@ -445,6 +451,7 @@ int hibernation_platform_enter(void)
 	hibernation_ops->finish();
  Resume_devices:
 	device_resume(PMSG_RESTORE);
+	__ftrace_enabled_restore(ftrace_save);
 	resume_console();
  Close:
 	hibernation_ops->end();

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2008-07-26 19:52 Ingo Molnar
  0 siblings, 0 replies; 53+ messages in thread
From: Ingo Molnar @ 2008-07-26 19:52 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Steven Rostedt, Andrew Morton


Linus,

Please pull the latest tracing-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

Thanks,

	Ingo

------------------>
Ingo Molnar (2):
      ftrace: remove latency-tracer leftover
      ftrace: fix modular build

Steven Rostedt (3):
      ftrace: fix 4d3702b6 (post-v2.6.26): WARNING: at kernel/lockdep.c:2731 check_flags (ftrace)
      ftrace: only trace preempt off with preempt tracer
      ftrace: disable tracing on acpi idle calls


 drivers/acpi/processor_idle.c     |    6 ++++++
 kernel/trace/trace.c              |    3 ---
 kernel/trace/trace_irqsoff.c      |    8 ++++++--
 kernel/trace/trace_sched_wakeup.c |   27 ++++++++++++++++-----------
 4 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index d592dbb..b7f2963 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -272,6 +272,8 @@ static atomic_t c3_cpu_count;
 /* Common C-state entry for C2, C3, .. */
 static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
 {
+	/* Don't trace irqs off for idle */
+	stop_critical_timings();
 	if (cstate->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cstate);
@@ -284,6 +286,7 @@ static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
+	start_critical_timings();
 }
 #endif /* !CONFIG_CPU_IDLE */
 
@@ -1418,6 +1421,8 @@ static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr,
  */
 static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 {
+	/* Don't trace irqs off for idle */
+	stop_critical_timings();
 	if (cx->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cx);
@@ -1432,6 +1437,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
+	start_critical_timings();
 }
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 868e121..fc20e09 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1203,9 +1203,6 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
 
 	iter->pos = *pos;
 
-	if (last_ent && !ent)
-		seq_puts(m, "\n\nvim:ft=help\n");
-
 	return ent;
 }
 
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 421d6fe..ece6cfb 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -253,12 +253,14 @@ void start_critical_timings(void)
 	if (preempt_trace() || irq_trace())
 		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
 }
+EXPORT_SYMBOL_GPL(start_critical_timings);
 
 void stop_critical_timings(void)
 {
 	if (preempt_trace() || irq_trace())
 		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
 }
+EXPORT_SYMBOL_GPL(stop_critical_timings);
 
 #ifdef CONFIG_IRQSOFF_TRACER
 #ifdef CONFIG_PROVE_LOCKING
@@ -337,12 +339,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
 #ifdef CONFIG_PREEMPT_TRACER
 void trace_preempt_on(unsigned long a0, unsigned long a1)
 {
-	stop_critical_timing(a0, a1);
+	if (preempt_trace())
+		stop_critical_timing(a0, a1);
 }
 
 void trace_preempt_off(unsigned long a0, unsigned long a1)
 {
-	start_critical_timing(a0, a1);
+	if (preempt_trace())
+		start_critical_timing(a0, a1);
 }
 #endif /* CONFIG_PREEMPT_TRACER */
 
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3c8d61d..e303ccb 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -26,7 +26,8 @@ static struct task_struct	*wakeup_task;
 static int			wakeup_cpu;
 static unsigned			wakeup_prio = -1;
 
-static DEFINE_SPINLOCK(wakeup_lock);
+static raw_spinlock_t wakeup_lock =
+	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 
 static void __wakeup_reset(struct trace_array *tr);
 
@@ -56,7 +57,8 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
 	if (unlikely(disabled != 1))
 		goto out;
 
-	spin_lock_irqsave(&wakeup_lock, flags);
+	local_irq_save(flags);
+	__raw_spin_lock(&wakeup_lock);
 
 	if (unlikely(!wakeup_task))
 		goto unlock;
@@ -71,7 +73,8 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
 	trace_function(tr, data, ip, parent_ip, flags);
 
  unlock:
-	spin_unlock_irqrestore(&wakeup_lock, flags);
+	__raw_spin_unlock(&wakeup_lock);
+	local_irq_restore(flags);
 
  out:
 	atomic_dec(&data->disabled);
@@ -145,7 +148,8 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
 	if (likely(disabled != 1))
 		goto out;
 
-	spin_lock_irqsave(&wakeup_lock, flags);
+	local_irq_save(flags);
+	__raw_spin_lock(&wakeup_lock);
 
 	/* We could race with grabbing wakeup_lock */
 	if (unlikely(!tracer_enabled || next != wakeup_task))
@@ -174,7 +178,8 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
 
 out_unlock:
 	__wakeup_reset(tr);
-	spin_unlock_irqrestore(&wakeup_lock, flags);
+	__raw_spin_unlock(&wakeup_lock);
+	local_irq_restore(flags);
 out:
 	atomic_dec(&tr->data[cpu]->disabled);
 }
@@ -209,8 +214,6 @@ static void __wakeup_reset(struct trace_array *tr)
 	struct trace_array_cpu *data;
 	int cpu;
 
-	assert_spin_locked(&wakeup_lock);
-
 	for_each_possible_cpu(cpu) {
 		data = tr->data[cpu];
 		tracing_reset(data);
@@ -229,9 +232,11 @@ static void wakeup_reset(struct trace_array *tr)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&wakeup_lock, flags);
+	local_irq_save(flags);
+	__raw_spin_lock(&wakeup_lock);
 	__wakeup_reset(tr);
-	spin_unlock_irqrestore(&wakeup_lock, flags);
+	__raw_spin_unlock(&wakeup_lock);
+	local_irq_restore(flags);
 }
 
 static void
@@ -252,7 +257,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
 		goto out;
 
 	/* interrupts should be off from try_to_wake_up */
-	spin_lock(&wakeup_lock);
+	__raw_spin_lock(&wakeup_lock);
 
 	/* check for races. */
 	if (!tracer_enabled || p->prio >= wakeup_prio)
@@ -274,7 +279,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
 		       CALLER_ADDR1, CALLER_ADDR2, flags);
 
 out_locked:
-	spin_unlock(&wakeup_lock);
+	__raw_spin_unlock(&wakeup_lock);
 out:
 	atomic_dec(&tr->data[cpu]->disabled);
 }

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: [git pull] tracing fixes
  2008-07-18 10:35     ` Ingo Molnar
@ 2008-07-19  1:18       ` Steven Rostedt
  0 siblings, 0 replies; 53+ messages in thread
From: Steven Rostedt @ 2008-07-19  1:18 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, linux-kernel, Thomas Gleixner



On Fri, 18 Jul 2008, Ingo Molnar wrote:
> ok, figured it out today: the lockups were due to the NMI watchdog and a
> missing NMI protection in cpu_clock(). I've reactivated the topic that
> solves this problem area and it all works fine now.
>
> the sched.o change probably made a difference just because it reduced
> the cross section between the NMI watchdog and the scheduler, making
> lockups less likely during the ftrace self-test. I'll revert it once the
> tracing/nmisafe is upstream.

Ingo,

Thanks a lot for looking into this.

-- Steve


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [git pull] tracing fixes
  2008-07-18  8:41   ` Ingo Molnar
@ 2008-07-18 10:35     ` Ingo Molnar
  2008-07-19  1:18       ` Steven Rostedt
  0 siblings, 1 reply; 53+ messages in thread
From: Ingo Molnar @ 2008-07-18 10:35 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Linus Torvalds, linux-kernel, Thomas Gleixner


* Ingo Molnar <mingo@elte.hu> wrote:

> > >  CFLAGS_REMOVE_sched_clock.o = -pg
> > > +CFLAGS_REMOVE_sched.o = -mno-spe -pg
> > >  endif
> > >
> > 
> > Ingo,
> > 
> > Why not trace the scheduler functions? I found a lot of useful 
> > information from seeing what functions are being called (namely the 
> > latencies caused by the fair scheduler balancing). Not being able to 
> > trace sched.c seems to keep a lot of useful data from being accessed.
> 
> i agree in general, but it was causing lockups with:
> 
>       http://redhat.com/~mingo/misc/config-Thu_Jul_17_13_34_52_CEST_2008
> 
> note the MAXSMP in the config which sets NR_CPUS to 4096:
> 
>       CONFIG_NR_CPUS=4096
> 
> our randconfig testing stumbled on it. That is a debug helper to "tune 
> up the kernel for as large systems as possible" and can bring in 
> regressions not normally seen.

ok, figured it out today: the lockups were due to the NMI watchdog and a 
missing NMI protection in cpu_clock(). I've reactivated the topic that 
solves this problem area and it all works fine now.

the sched.o change probably made a difference just because it reduced 
the cross section between the NMI watchdog and the scheduler, making 
lockups less likely during the ftrace self-test. I'll revert it once the 
tracing/nmisafe is upstream.

	Ingo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [git pull] tracing fixes
  2008-07-18  2:52 ` Steven Rostedt
  2008-07-18  3:02   ` Steven Rostedt
@ 2008-07-18  8:41   ` Ingo Molnar
  2008-07-18 10:35     ` Ingo Molnar
  1 sibling, 1 reply; 53+ messages in thread
From: Ingo Molnar @ 2008-07-18  8:41 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Linus Torvalds, linux-kernel, Thomas Gleixner


* Steven Rostedt <rostedt@goodmis.org> wrote:

> On Thu, 17 Jul 2008, Ingo Molnar wrote:
> >
> > Ingo Molnar (4):
> >       ftrace: fix merge buglet
> >       ftrace: fix lockup with MAXSMP
> >       ftrace: do not trace scheduler functions
> >       ftrace: do not trace library functions
> >
> 
> [...]
> > --- a/kernel/Makefile
> > +++ b/kernel/Makefile
> > @@ -11,8 +11,6 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
> >  	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
> >  	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o
> >
> > -CFLAGS_REMOVE_sched.o = -mno-spe
> > -
> >  ifdef CONFIG_FTRACE
> >  # Do not trace debug files and internal ftrace files
> >  CFLAGS_REMOVE_lockdep.o = -pg
> > @@ -21,6 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
> >  CFLAGS_REMOVE_rtmutex-debug.o = -pg
> >  CFLAGS_REMOVE_cgroup-debug.o = -pg
> >  CFLAGS_REMOVE_sched_clock.o = -pg
> > +CFLAGS_REMOVE_sched.o = -mno-spe -pg
> >  endif
> >
> 
> Ingo,
> 
> Why not trace the scheduler functions? I found a lot of useful 
> information from seeing what functions are being called (namely the 
> latencies caused by the fair scheduler balancing). Not being able to 
> trace sched.c seems to keep a lot of useful data from being accessed.

i agree in general, but it was causing lockups with:

      http://redhat.com/~mingo/misc/config-Thu_Jul_17_13_34_52_CEST_2008

note the MAXSMP in the config which sets NR_CPUS to 4096:

      CONFIG_NR_CPUS=4096

our randconfig testing stumbled on it. That is a debug helper to "tune 
up the kernel for as large systems as possible" and can bring in 
regressions not normally seen.

after i spent a good 4 hours on figuring out the lib/*.o details i didnt 
have the stamina to find the exact reason within sched.o :-)

One thing that needs looking at is that ftrace's self-recursion checks 
are not as robust as they used to be, and this is a recent regression 
(as in: last 1-2 weeks). Why do we have to exclude tsc.o from tracing 
for example? Why isnt cpu_clock() called inside a recursion-protected 
section? Why are all the trace function callbacks called outside of 
recursion checks? Why arent ftrace lockups debuggable via the NMI 
watchdog + early printk? I think it would be more robust to do a 
recursion check ASAP.

> also, is the '-mno-spe' safe when ftrace is not configured?

Why was the -mno-spe added exactly? I havent seen it explained in the 
commit that added its removal:

| commit 6ec562328fda585be2d7f472cfac99d3b44d362a
| Author: Steven Rostedt <rostedt@goodmis.org>
| Date:   Wed May 14 21:30:30 2008 -0400
|
|    ftrace: use the new kbuild CFLAGS_REMOVE for kernel directory

it talks about a cleanup but also adds -mno-spe removal that wasnt there 
before. This seems to be a powerpc special and the exact context is not 
clear to me.

	Ingo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [git pull] tracing fixes
  2008-07-18  2:52 ` Steven Rostedt
@ 2008-07-18  3:02   ` Steven Rostedt
  2008-07-18  8:41   ` Ingo Molnar
  1 sibling, 0 replies; 53+ messages in thread
From: Steven Rostedt @ 2008-07-18  3:02 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, linux-kernel, Thomas Gleixner


On Thu, 17 Jul 2008, Steven Rostedt wrote:
>
> also, is the '-mno-spe' save when ftrace is not configured?

s/save/safe/

-- Steve


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [git pull] tracing fixes
  2008-07-17 17:32 Ingo Molnar
@ 2008-07-18  2:52 ` Steven Rostedt
  2008-07-18  3:02   ` Steven Rostedt
  2008-07-18  8:41   ` Ingo Molnar
  0 siblings, 2 replies; 53+ messages in thread
From: Steven Rostedt @ 2008-07-18  2:52 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, linux-kernel, Thomas Gleixner


On Thu, 17 Jul 2008, Ingo Molnar wrote:
>
> Ingo Molnar (4):
>       ftrace: fix merge buglet
>       ftrace: fix lockup with MAXSMP
>       ftrace: do not trace scheduler functions
>       ftrace: do not trace library functions
>

[...]
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -11,8 +11,6 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
>  	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
>  	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o
>
> -CFLAGS_REMOVE_sched.o = -mno-spe
> -
>  ifdef CONFIG_FTRACE
>  # Do not trace debug files and internal ftrace files
>  CFLAGS_REMOVE_lockdep.o = -pg
> @@ -21,6 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
>  CFLAGS_REMOVE_rtmutex-debug.o = -pg
>  CFLAGS_REMOVE_cgroup-debug.o = -pg
>  CFLAGS_REMOVE_sched_clock.o = -pg
> +CFLAGS_REMOVE_sched.o = -mno-spe -pg
>  endif
>

Ingo,

Why not trace the scheduler functions? I found a lot of useful information
from seeing what functions are being called (namely the latencies caused
by the fair scheduler balancing). Not being able to trace sched.c seems to
keep a lot of useful data from being accessed.

also, is the '-mno-spe' save when ftrace is not configured?

-- Steve


^ permalink raw reply	[flat|nested] 53+ messages in thread

* [git pull] tracing fixes
@ 2008-07-17 17:32 Ingo Molnar
  2008-07-18  2:52 ` Steven Rostedt
  0 siblings, 1 reply; 53+ messages in thread
From: Ingo Molnar @ 2008-07-17 17:32 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Thomas Gleixner, Steven Rostedt

Linus,

Please pull the latest tracing fixes git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing-fixes-for-linus

Thanks,

	Ingo

------------------>
Ingo Molnar (4):
      ftrace: fix merge buglet
      ftrace: fix lockup with MAXSMP
      ftrace: do not trace scheduler functions
      ftrace: do not trace library functions


 arch/x86/kernel/Makefile |    3 +--
 kernel/Makefile          |    3 +--
 lib/Makefile             |   14 +++++---------
 3 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5112c84..da14061 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -8,8 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 
 ifdef CONFIG_FTRACE
 # Do not profile debug utilities
-CFLAGS_REMOVE_tsc_64.o = -pg
-CFLAGS_REMOVE_tsc_32.o = -pg
+CFLAGS_REMOVE_tsc.o = -pg
 CFLAGS_REMOVE_rtc.o = -pg
 endif
 
diff --git a/kernel/Makefile b/kernel/Makefile
index 0a7ed83..985ddb7 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,8 +11,6 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o
 
-CFLAGS_REMOVE_sched.o = -mno-spe
-
 ifdef CONFIG_FTRACE
 # Do not trace debug files and internal ftrace files
 CFLAGS_REMOVE_lockdep.o = -pg
@@ -21,6 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
 CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
+CFLAGS_REMOVE_sched.o = -mno-spe -pg
 endif
 
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
diff --git a/lib/Makefile b/lib/Makefile
index 2c62a9c..818c4d4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -2,21 +2,17 @@
 # Makefile for some libs needed in the kernel.
 #
 
+ifdef CONFIG_FTRACE
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
+endif
+
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o \
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o prio_heap.o ratelimit.o
 
-ifdef CONFIG_FTRACE
-# Do not profile string.o, since it may be used in early boot or vdso
-CFLAGS_REMOVE_string.o = -pg
-# Also do not profile any debug utilities
-CFLAGS_REMOVE_spinlock_debug.o = -pg
-CFLAGS_REMOVE_list_debug.o = -pg
-CFLAGS_REMOVE_debugobjects.o = -pg
-endif
-
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 

^ permalink raw reply related	[flat|nested] 53+ messages in thread

end of thread, other threads:[~2010-04-04 10:09 UTC | newest]

Thread overview: 53+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-01-16 16:57 [GIT PULL] tracing fixes Ingo Molnar
  -- strict thread matches above, loose matches on Subject: below --
2010-04-04 10:09 Ingo Molnar
2010-03-13 16:33 Ingo Molnar
2010-02-14  9:09 Ingo Molnar
2010-02-15  1:40 ` Masami Hiramatsu
2010-02-15  4:50   ` Ingo Molnar
2010-02-15 16:57     ` Masami Hiramatsu
2010-01-31 17:23 Ingo Molnar
2009-12-31 11:55 Ingo Molnar
2009-12-15 20:31 Ingo Molnar
2009-12-10 19:40 Ingo Molnar
2009-11-04 15:49 Ingo Molnar
2009-11-01 15:26 Ingo Molnar
2009-10-13 18:17 Ingo Molnar
2009-10-08 18:52 Ingo Molnar
2009-10-02 12:37 Ingo Molnar
2009-09-26 12:23 Ingo Molnar
2009-09-21 13:02 Ingo Molnar
2009-09-21 16:08 ` Linus Torvalds
2009-09-21 16:22   ` Ingo Molnar
2009-09-26 16:10     ` Christoph Hellwig
2009-10-01 19:02       ` Ingo Molnar
2009-08-25 18:02 Ingo Molnar
2009-08-09 16:08 Ingo Molnar
2009-08-04 19:01 Ingo Molnar
2009-07-10 16:25 Ingo Molnar
2009-06-26 18:56 Ingo Molnar
2009-06-20 16:53 Ingo Molnar
2009-05-18 14:29 Ingo Molnar
2009-05-05  9:31 Ingo Molnar
2009-04-17  1:01 Ingo Molnar
2009-04-13 17:32 Ingo Molnar
2009-04-09 15:45 Ingo Molnar
2009-04-07 19:23 Ingo Molnar
2009-02-19 17:08 [git pull] " Ingo Molnar
2009-02-17 16:37 Ingo Molnar
2009-02-11 14:25 Ingo Molnar
2009-02-04 19:08 Ingo Molnar
2009-01-30 23:02 Ingo Molnar
2009-01-11 14:47 Ingo Molnar
2008-11-29 19:34 Ingo Molnar
2008-11-20 11:26 Ingo Molnar
2008-11-18 14:46 Ingo Molnar
2008-11-11 18:24 Ingo Molnar
2008-11-03 18:03 Ingo Molnar
2008-08-28 13:31 Ingo Molnar
2008-07-26 19:52 Ingo Molnar
2008-07-17 17:32 Ingo Molnar
2008-07-18  2:52 ` Steven Rostedt
2008-07-18  3:02   ` Steven Rostedt
2008-07-18  8:41   ` Ingo Molnar
2008-07-18 10:35     ` Ingo Molnar
2008-07-19  1:18       ` Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).