linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "zhangwei(Jovi)" <jovi.zhangwei@huawei.com>
To: Steven Rostedt <rostedt@goodmis.org>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Ingo Molnar <mingo@redhat.com>,
	LKML <linux-kernel@vger.kernel.org>
Cc: "zhangwei(Jovi)" <jovi.zhangwei@huawei.com>
Subject: [PATCH 03/14] tracing: expose event tracing infrastructure
Date: Wed, 27 Mar 2013 17:48:45 +0800	[thread overview]
Message-ID: <1364377737-10540-5-git-send-email-jovi.zhangwei@huawei.com> (raw)
In-Reply-To: <1364377737-10540-1-git-send-email-jovi.zhangwei@huawei.com>

From: "zhangwei(Jovi)" <jovi.zhangwei@huawei.com>

Currently event tracing only can be use for ftrace and perf,
there don't have any mechanism to let modules(like external tracing tool)
register callback tracing function.

Event tracing implement based on tracepoint, compare with raw tracepoint,
event tracing infrastructure provide built-in structured event annotate format,
this feature should expose to external user.

For example, simple pseudo ktap script demonstrate how to use this event
tracing expose change.

function event_trace(e)
{
        printf("%s", e.annotate);
}

os.trace("sched:sched_switch", event_trace);
os.trace("irq:softirq_raise", event_trace);

The running result:
sched_switch: prev_comm=rcu_sched prev_pid=10 prev_prio=120 prev_state=S ==> next_comm=swapper/1 next_pid=0 next_prio=120
softirq_raise: vec=1 [action=TIMER]
...

This expose change can be use by other tracing tool, like systemtap/lttng,
if they would implement this.

This patch introduce struct event_trace_ops in trace_array, it have
two callback functions, pre_trace and do_trace.
when ftrace_raw_event_<call> function hit, it will call all
registered event_trace_ops.

the benefit of this change is kernel size shrink ~18K

(the kernel size will reduce more when perf tracing code
converting to use this mechanism in future)

text    data     bss     dec     hex filename
7402131  804364 3149824 11356319         ad489f vmlinux.old
7383115  804684 3149824 11337623         acff97 vmlinux.new

Signed-off-by: zhangwei(Jovi) <jovi.zhangwei@huawei.com>
---
 include/linux/ftrace_event.h |   26 ++++++++++++++++
 include/linux/trace_array.h  |    1 +
 include/trace/ftrace.h       |   69 +++++++++++++-----------------------------
 kernel/trace/trace.c         |    4 ++-
 kernel/trace/trace.h         |    2 ++
 kernel/trace/trace_events.c  |   53 ++++++++++++++++++++++++++++++++
 6 files changed, 106 insertions(+), 49 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4e28b01..27d7a4f 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -6,6 +6,7 @@
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
 #include <linux/perf_event.h>
+#include <linux/trace_array.h>
 
 struct trace_array;
 struct trace_buffer;
@@ -245,6 +246,31 @@ struct ftrace_event_call {
 #endif
 };
 
+struct ftrace_trace_descriptor_t {
+	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
+	unsigned long irq_flags;
+	int pc;
+};
+
+/*
+ * trace_descriptor_t is purpose for passing arguments between
+ * pre_trace and do_trace function.
+ * this definition is ugly, change it in future.
+ */
+struct trace_descriptor_t {
+	struct ftrace_trace_descriptor_t	f;
+	void  *data;
+};
+
+/* callback function for tracing */
+struct event_trace_ops {
+	void *(*pre_trace)(struct ftrace_event_file *file,
+			   int entry_size, void *data);
+	void (*do_trace)(struct ftrace_event_file *file, void *entry,
+			 int entry_size, void *data);
+};
+
 struct trace_array;
 struct ftrace_subsystem_dir;
 
diff --git a/include/linux/trace_array.h b/include/linux/trace_array.h
index c5b7a13..b362c5f 100644
--- a/include/linux/trace_array.h
+++ b/include/linux/trace_array.h
@@ -56,6 +56,7 @@ struct trace_array {
 	struct list_head	list;
 	char			*name;
 	struct trace_buffer	trace_buffer;
+	struct event_trace_ops	*ops;
 #ifdef CONFIG_TRACER_MAX_TRACE
 	/*
 	 * The max_buffer is used to snapshot the trace when a maximum
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 4bda044..e4ea38c 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -401,41 +401,28 @@ static inline notrace int ftrace_get_offsets_##call(			\
  *
  * static struct ftrace_event_call event_<call>;
  *
- * static void ftrace_raw_event_<call>(void *__data, proto)
+ * static notrace void ftrace_raw_event_##call(void *__data, proto)
  * {
  *	struct ftrace_event_file *ftrace_file = __data;
- *	struct ftrace_event_call *event_call = ftrace_file->event_call;
- *	struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
- *	struct ring_buffer_event *event;
- *	struct ftrace_raw_<call> *entry; <-- defined in stage 1
- *	struct ring_buffer *buffer;
- *	unsigned long irq_flags;
- *	int __data_size;
- *	int pc;
+ *	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;
+ *	struct trace_descriptor_t __desc;
+ *	struct event_trace_ops *ops = ftrace_file->tr->ops;
+ *	struct ftrace_raw_##call *entry; <-- defined in stage 1
+ *	int __data_size, __entry_size;
  *
- *	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT,
- *		     &ftrace_file->flags))
- *		return;
- *
- *	local_save_flags(irq_flags);
- *	pc = preempt_count();
- *
- *	__data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
+ *	__data_size = ftrace_get_offsets_##call(&__data_offsets, args);
+ *	__entry_size = sizeof(*entry) + __data_size;
  *
- *	event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
- *				  event_<call>->event.type,
- *				  sizeof(*entry) + __data_size,
- *				  irq_flags, pc);
- *	if (!event)
+ *	entry = ops->pre_trace(ftrace_file, __entry_size, &__desc);
+ *	if (!entry)
  *		return;
- *	entry	= ring_buffer_event_data(event);
+ *
+ *	tstruct
  *
  *	{ <assign>; }  <-- Here we assign the entries by the __field and
  *			   __array macros.
  *
- *	if (!filter_current_check_discard(buffer, event_call, entry, event))
- *		trace_nowake_buffer_unlock_commit(buffer,
- *						   event, irq_flags, pc);
+ *	ops->do_trace(ftrace_file, entry, __entry_size, &__desc);
  * }
  *
  * static struct trace_event ftrace_event_type_<call> = {
@@ -513,38 +500,24 @@ static notrace void							\
 ftrace_raw_event_##call(void *__data, proto)				\
 {									\
 	struct ftrace_event_file *ftrace_file = __data;			\
-	struct ftrace_event_call *event_call = ftrace_file->event_call;	\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
-	struct ring_buffer_event *event;				\
+	struct trace_descriptor_t __desc;				\
+	struct event_trace_ops *ops = ftrace_file->tr->ops;		\
 	struct ftrace_raw_##call *entry;				\
-	struct ring_buffer *buffer;					\
-	unsigned long irq_flags;					\
-	int __data_size;						\
-	int pc;								\
+	int __data_size, __entry_size;					\
 									\
-	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT,			\
-		     &ftrace_file->flags))				\
-		return;							\
-									\
-	local_save_flags(irq_flags);					\
-	pc = preempt_count();						\
+ 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
+	__entry_size = sizeof(*entry) + __data_size;			\
 									\
-	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
-									\
-	event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,	\
-				 event_call->event.type,		\
-				 sizeof(*entry) + __data_size,		\
-				 irq_flags, pc);			\
-	if (!event)							\
+	entry = ops->pre_trace(ftrace_file, __entry_size, &__desc);	\
+	if (!entry)							\
 		return;							\
-	entry	= ring_buffer_event_data(event);			\
 									\
 	tstruct								\
 									\
 	{ assign; }							\
 									\
-	if (!filter_current_check_discard(buffer, event_call, entry, event)) \
-		trace_buffer_unlock_commit(buffer, event, irq_flags, pc); \
+	ops->do_trace(ftrace_file, entry, __entry_size, &__desc);	\
 }
 /*
  * The ftrace_test_probe is compiled out, it is only here as a build time check
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 829b2be..224b152 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -189,7 +189,7 @@ unsigned long long ns2usecs(cycle_t nsec)
  * pages for the buffer for that CPU. Each CPU has the same number
  * of pages allocated for its buffer.
  */
-static struct trace_array	global_trace;
+static struct trace_array	global_trace = {.ops = &ftrace_events_ops};
 
 LIST_HEAD(ftrace_trace_arrays);
 
@@ -5773,6 +5773,8 @@ static int new_instance_create(const char *name)
 
 	list_add(&tr->list, &ftrace_trace_arrays);
 
+	tr->ops = &ftrace_events_ops;
+
 	mutex_unlock(&trace_types_lock);
 
 	return 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index a8acfcd..0a1f4be 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -493,6 +493,8 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 
 extern unsigned long tracing_thresh;
 
+extern struct event_trace_ops ftrace_events_ops;
+
 #ifdef CONFIG_TRACER_MAX_TRACE
 extern unsigned long tracing_max_latency;
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53582e9..23f4cfc 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -241,6 +241,59 @@ void trace_event_enable_cmd_record(bool enable)
 	mutex_unlock(&event_mutex);
 }
 
+static void *ftrace_events_pre_trace(struct ftrace_event_file *file,
+				     int entry_size, void *data)
+{
+	struct ftrace_event_call *event_call = file->event_call;
+	struct ftrace_trace_descriptor_t *desc = &((struct trace_descriptor_t *)
+						 data)->f;
+	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
+	unsigned long irq_flags;
+	int pc;
+
+	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags))
+		return NULL;
+
+	local_save_flags(irq_flags);
+	pc = preempt_count();
+
+	event = trace_event_buffer_lock_reserve(&buffer, file,
+						event_call->event.type,
+						entry_size, irq_flags, pc);
+
+	if (!event)
+		return NULL;
+
+	desc->event = event;
+	desc->buffer = buffer;
+	desc->irq_flags = irq_flags;
+	desc->pc = pc;
+
+	return ring_buffer_event_data(event);
+}
+
+static void ftrace_events_do_trace(struct ftrace_event_file *file, void *entry,
+				   int entry_size, void *data)
+{
+	struct ftrace_event_call *event_call = file->event_call;
+	struct ftrace_trace_descriptor_t *desc = &((struct trace_descriptor_t *)
+						 data)->f;
+	struct ring_buffer_event *event = desc->event;
+	struct ring_buffer *buffer = desc->buffer;
+	unsigned long irq_flags = desc->irq_flags;
+	int pc = desc->pc;
+
+	if (!filter_current_check_discard(buffer, event_call, entry, event))
+		trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
+}
+
+struct event_trace_ops ftrace_events_ops = {
+	.pre_trace = ftrace_events_pre_trace,
+	.do_trace  = ftrace_events_do_trace,
+};
+
+
 static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
 					 int enable, int soft_disable)
 {
-- 
1.7.9.7



  parent reply	other threads:[~2013-03-27  9:51 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-27  9:48 [PATCH 00/14] event tracing expose change and bugfix/cleanup zhangwei(Jovi)
2013-03-27  9:48 ` zhangwei(Jovi)
2013-03-27  9:48 ` [PATCH 01/14] tracing: move trace_array definition into include/linux/trace_array.h zhangwei(Jovi)
2013-03-27  9:48 ` [PATCH 02/14] tracing: fix irqs-off tag display in syscall tracing zhangwei(Jovi)
2013-03-27  9:48 ` zhangwei(Jovi) [this message]
2013-03-27  9:48 ` [PATCH 04/14] tracing: add private data field into struct ftrace_event_file zhangwei(Jovi)
2013-03-27  9:48 ` [PATCH 05/14] tracing: switch syscall tracing to use event_trace_ops backend zhangwei(Jovi)
2013-03-27  9:48 ` [PATCH 06/14] tracing: export syscall metadata zhangwei(Jovi)
2013-03-27  9:48 ` [PATCH 07/14] tracing: expose structure ftrace_event_field zhangwei(Jovi)
2013-03-27  9:48 ` [PATCH 08/14] tracing: remove TRACE_EVENT_TYPE enum definition zhangwei(Jovi)
2013-03-27  9:48 ` [PATCH 09/14] tracing: remove obsolete macro guard _TRACE_PROFILE_INIT zhangwei(Jovi)
2013-03-27  9:48 ` [PATCH 10/14] tracing: remove ftrace(...) function zhangwei(Jovi)
2013-03-27  9:48 ` [PATCH 11/14] tracing: use per trace_array clock_id instead of global trace_clock_id zhangwei(Jovi)
2013-03-27  9:48 ` [PATCH 12/14] tracing: guard tracing_selftest_disabled by CONFIG_FTRACE_STARTUP_TEST zhangwei(Jovi)
2013-03-27  9:48 ` [PATCH 13/14] libtraceevent: add libtraceevent prefix in warning message zhangwei(Jovi)
2013-03-27  9:48 ` [PATCH 14/14] tracing: fix regression of perf function tracing zhangwei(Jovi)
2013-03-27  9:48 ` zhangwei(Jovi)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1364377737-10540-5-git-send-email-jovi.zhangwei@huawei.com \
    --to=jovi.zhangwei@huawei.com \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).