linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC patch 00/12] Tracepoints v2
@ 2008-07-04 23:52 Mathieu Desnoyers
  2008-07-04 23:52 ` [RFC patch 01/12] Kernel Tracepoints Mathieu Desnoyers
                   ` (12 more replies)
  0 siblings, 13 replies; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt

Hi,

Here is the second release of kernel tracepoints, including the architecture
independent instrumentation taken from LTTng. I submit this for another round of
comments.

The patchset applies on 2.6.26-rc8 in this order :

# Instrumentation
tracepoints.patch
lttng-instrumentation-fs.patch
lttng-instrumentation-ipc.patch
lttng-instrumentation-kernel.patch
lttng-instrumentation-mm.patch
lttng-instrumentation-net.patch

# LTTng probes, using markers
traceprobes.patch
lttng-instrumentation-fs-tracepoints-probes.patch
lttng-instrumentation-ipc-tracepoints-probes.patch
lttng-instrumentation-kernel-tracepoints-probes.patch
lttng-instrumentation-mm-tracepoints-probes.patch
lttng-instrumentation-net-tracepoints-probes.patch

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC patch 01/12] Kernel Tracepoints
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
@ 2008-07-04 23:52 ` Mathieu Desnoyers
  2008-07-07 16:27   ` Masami Hiramatsu
  2008-07-04 23:52 ` [RFC patch 02/12] LTTng tracepoint instrumentation fs Mathieu Desnoyers
                   ` (11 subsequent siblings)
  12 siblings, 1 reply; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt
  Cc: Mathieu Desnoyers, Hideo AOKI, Takashi Nishiie, Alexander Viro

[-- Attachment #1: tracepoints.patch --]
[-- Type: text/plain, Size: 25191 bytes --]

Implementation of kernel tracepoints. Inspired from the Linux Kernel Markers.

Allows complete typing verification. No format string required.

TODO : Documentation/tracepoint.txt

Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
  tracepoint table. This will make sure not type mismatch happens due to
  connexion of a probe with the wrong type to a tracepoint declared with
  the same name in a different header.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: 'Peter Zijlstra' <peterz@infradead.org>
CC: "Frank Ch. Eigler" <fche@redhat.com>
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: 'Hideo AOKI' <haoki@redhat.com>
CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
CC: 'Steven Rostedt' <rostedt@goodmis.org>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
---
 include/asm-generic/vmlinux.lds.h |    6 
 include/linux/module.h            |   17 +
 include/linux/tracepoint.h        |  123 +++++++++
 init/Kconfig                      |    7 
 kernel/Makefile                   |    1 
 kernel/module.c                   |   66 +++++
 kernel/tracepoint.c               |  474 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 692 insertions(+), 2 deletions(-)

Index: linux-2.6-lttng/init/Kconfig
===================================================================
--- linux-2.6-lttng.orig/init/Kconfig	2008-07-04 10:48:08.000000000 -0400
+++ linux-2.6-lttng/init/Kconfig	2008-07-04 11:12:22.000000000 -0400
@@ -782,6 +782,13 @@ config PROFILING
 	  Say Y here to enable the extended profiling support mechanisms used
 	  by profilers such as OProfile.
 
+config TRACEPOINTS
+	bool "Activate tracepoints"
+	default y
+	help
+	  Place an empty function call at each tracepoint site. Can be
+	  dynamically changed for a probe function.
+
 config MARKERS
 	bool "Activate markers"
 	help
Index: linux-2.6-lttng/kernel/Makefile
===================================================================
--- linux-2.6-lttng.orig/kernel/Makefile	2008-07-04 10:48:08.000000000 -0400
+++ linux-2.6-lttng/kernel/Makefile	2008-07-04 11:10:41.000000000 -0400
@@ -68,6 +68,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_MARKERS) += marker.o
+obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
Index: linux-2.6-lttng/include/linux/tracepoint.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/linux/tracepoint.h	2008-07-04 11:10:39.000000000 -0400
@@ -0,0 +1,123 @@
+#ifndef _LINUX_TRACEPOINT_H
+#define _LINUX_TRACEPOINT_H
+
+/*
+ * Kernel Tracepoint API.
+ *
+ * See Documentation/tracepoint.txt.
+ *
+ * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * Heavily inspired from the Linux Kernel Markers.
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+
+struct module;
+struct tracepoint;
+
+struct tracepoint {
+	const char *name;		/* Tracepoint name */
+	int state;			/* State. */
+	void **funcs;
+} __attribute__((aligned(8)));
+
+
+#define TPPROTO(args...)	args
+#define TPARGS(args...)		args
+
+#ifdef CONFIG_TRACEPOINTS
+
+#define __DO_TRACE(tp, proto, args)					\
+	do {								\
+		int i;							\
+		void **funcs;						\
+		preempt_disable();					\
+		funcs = (tp)->funcs;					\
+		smp_read_barrier_depends();				\
+		if (funcs) {						\
+			for (i = 0; funcs[i]; i++) {			\
+				((void(*)(proto))(funcs[i]))(args);	\
+			}						\
+		}							\
+		preempt_enable();					\
+	} while (0)
+
+/*
+ * Make sure the alignment of the structure in the __tracepoints section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define DEFINE_TRACE(name, proto, args)					\
+	static inline void trace_##name(proto)				\
+	{								\
+		static const char __tpstrtab_##name[]			\
+		__attribute__((section("__tracepoints_strings")))	\
+		= #name ":" #proto;					\
+		static struct tracepoint __tracepoint_##name		\
+		__attribute__((section("__tracepoints"), aligned(8))) =	\
+		{ __tpstrtab_##name, 0, NULL };				\
+		if (unlikely(__tracepoint_##name.state))		\
+			__DO_TRACE(&__tracepoint_##name,		\
+				TPPROTO(proto), TPARGS(args));		\
+	}								\
+	static inline int register_trace_##name(void (*probe)(proto))	\
+	{								\
+		return tracepoint_probe_register(#name ":" #proto,	\
+			(void *)probe);					\
+	}								\
+	static inline void unregister_trace_##name(void (*probe)(proto))\
+	{								\
+		tracepoint_probe_unregister(#name ":" #proto,		\
+			(void *)probe);					\
+	}
+
+extern void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end);
+
+#else /* !CONFIG_TRACEPOINTS */
+#define DEFINE_TRACE(name, proto, args)			\
+	static inline void _do_trace_##name(struct tracepoint *tp, proto) \
+	{ }								\
+	static inline void trace_##name(proto)				\
+	{ }								\
+	static inline int register_trace_##name(void (*probe)(proto))	\
+	{								\
+		return -ENOSYS;						\
+	}								\
+	static inline void unregister_trace_##name(void (*probe)(proto))\
+	{ }
+
+static inline void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end)
+{ }
+#endif /* CONFIG_TRACEPOINTS */
+
+/*
+ * Connect a probe to a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_register(const char *name, void *probe);
+
+/*
+ * Disconnect a probe from a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_unregister(const char *name, void *probe);
+
+struct tracepoint_iter {
+	struct module *module;
+	struct tracepoint *tracepoint;
+};
+
+extern void tracepoint_iter_start(struct tracepoint_iter *iter);
+extern void tracepoint_iter_next(struct tracepoint_iter *iter);
+extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
+extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
+extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+	struct tracepoint *begin, struct tracepoint *end);
+
+#endif
Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2008-07-04 10:48:08.000000000 -0400
+++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2008-07-04 11:10:41.000000000 -0400
@@ -52,7 +52,10 @@
 	. = ALIGN(8);							\
 	VMLINUX_SYMBOL(__start___markers) = .;				\
 	*(__markers)							\
-	VMLINUX_SYMBOL(__stop___markers) = .;
+	VMLINUX_SYMBOL(__stop___markers) = .;				\
+	VMLINUX_SYMBOL(__start___tracepoints) = .;			\
+	*(__tracepoints)						\
+	VMLINUX_SYMBOL(__stop___tracepoints) = .;
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
@@ -61,6 +64,7 @@
 		*(.rodata) *(.rodata.*)					\
 		*(__vermagic)		/* Kernel version magic */	\
 		*(__markers_strings)	/* Markers: strings */		\
+		*(__tracepoints_strings)/* Tracepoints: strings */	\
 	}								\
 									\
 	.rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {		\
Index: linux-2.6-lttng/kernel/tracepoint.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/tracepoint.c	2008-07-04 11:10:39.000000000 -0400
@@ -0,0 +1,474 @@
+/*
+ * Copyright (C) 2008 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/tracepoint.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+extern struct tracepoint __start___tracepoints[];
+extern struct tracepoint __stop___tracepoints[];
+
+/* Set to 1 to enable tracepoint debug output */
+static const int tracepoint_debug;
+
+/*
+ * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
+ * builtin and module tracepoints and the hash table.
+ */
+static DEFINE_MUTEX(tracepoints_mutex);
+
+/*
+ * Tracepoint hash table, containing the active tracepoints.
+ * Protected by tracepoints_mutex.
+ */
+#define TRACEPOINT_HASH_BITS 6
+#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
+
+/*
+ * Note about RCU :
+ * It is used to to delay the free of multiple probes array until a quiescent
+ * state is reached.
+ * Tracepoint entries modifications are protected by the tracepoints_mutex.
+ */
+struct tracepoint_entry {
+	struct hlist_node hlist;
+	void **funcs;
+	int refcount;	/* Number of times armed. 0 if disarmed. */
+	struct rcu_head rcu;
+	void *oldptr;
+	unsigned char rcu_pending:1;
+	char name[0];
+};
+
+static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
+
+static void free_old_closure(struct rcu_head *head)
+{
+	struct tracepoint_entry *entry = container_of(head,
+		struct tracepoint_entry, rcu);
+	kfree(entry->oldptr);
+	/* Make sure we free the data before setting the pending flag to 0 */
+	smp_wmb();
+	entry->rcu_pending = 0;
+}
+
+static void debug_print_probes(struct tracepoint_entry *entry)
+{
+	int i;
+
+	if (!tracepoint_debug)
+		return;
+
+	for (i = 0; entry->funcs[i]; i++)
+		printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
+}
+
+static void *
+tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
+{
+	int nr_probes = 0;
+	void **old, **new;
+
+	WARN_ON(!probe);
+
+	debug_print_probes(entry);
+	old = entry->funcs;
+	if (old) {
+		/* (N -> N+1), (N != 0, 1) probes */
+		for (nr_probes = 0; old[nr_probes]; nr_probes++)
+			if (old[nr_probes] == probe)
+				return ERR_PTR(-EBUSY);
+	}
+	/* + 2 : one for new probe, one for NULL func */
+	new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
+	if (new == NULL)
+		return ERR_PTR(-ENOMEM);
+	if (old)
+		memcpy(new, old, nr_probes * sizeof(void *));
+	new[nr_probes] = probe;
+	entry->refcount = nr_probes + 1;
+	entry->funcs = new;
+	debug_print_probes(entry);
+	return old;
+}
+
+static void *
+tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
+{
+	int nr_probes = 0, nr_del = 0, i;
+	void **old, **new;
+
+	old = entry->funcs;
+
+	debug_print_probes(entry);
+	/* (N -> M), (N > 1, M >= 0) probes */
+	for (nr_probes = 0; old[nr_probes]; nr_probes++) {
+		if ((!probe || old[nr_probes] == probe))
+			nr_del++;
+	}
+
+	if (nr_probes - nr_del == 0) {
+		/* N -> 0, (N > 1) */
+		entry->funcs = NULL;
+		entry->refcount = 0;
+		debug_print_probes(entry);
+		return old;
+	} else {
+		int j = 0;
+		/* N -> M, (N > 1, M > 0) */
+		/* + 1 for NULL */
+		new = kzalloc((nr_probes - nr_del + 1)
+			* sizeof(void *), GFP_KERNEL);
+		if (new == NULL)
+			return ERR_PTR(-ENOMEM);
+		for (i = 0; old[i]; i++)
+			if ((probe && old[i] != probe))
+				new[j++] = old[i];
+		entry->refcount = nr_probes - nr_del;
+		entry->funcs = new;
+	}
+	debug_print_probes(entry);
+	return old;
+}
+
+/*
+ * Get tracepoint if the tracepoint is present in the tracepoint hash table.
+ * Must be called with tracepoints_mutex held.
+ * Returns NULL if not present.
+ */
+static struct tracepoint_entry *get_tracepoint(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct tracepoint_entry *e;
+	u32 hash = jhash(name, strlen(name), 0);
+
+	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name))
+			return e;
+	}
+	return NULL;
+}
+
+/*
+ * Add the tracepoint to the tracepoint hash table. Must be called with
+ * tracepoints_mutex held.
+ */
+static struct tracepoint_entry *add_tracepoint(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct tracepoint_entry *e;
+	size_t name_len = strlen(name) + 1;
+	u32 hash = jhash(name, name_len-1, 0);
+
+	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			printk(KERN_NOTICE
+				"tracepoint %s busy\n", name);
+			return ERR_PTR(-EBUSY);	/* Already there */
+		}
+	}
+	/*
+	 * Using kmalloc here to allocate a variable length element. Could
+	 * cause some memory fragmentation if overused.
+	 */
+	e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+	memcpy(&e->name[0], name, name_len);
+	e->funcs = NULL;
+	e->refcount = 0;
+	e->rcu_pending = 0;
+	hlist_add_head(&e->hlist, head);
+	return e;
+}
+
+/*
+ * Remove the tracepoint from the tracepoint hash table. Must be called with
+ * mutex_lock held.
+ */
+static int remove_tracepoint(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct tracepoint_entry *e;
+	int found = 0;
+	size_t len = strlen(name) + 1;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found)
+		return -ENOENT;
+	if (e->refcount)
+		return -EBUSY;
+	hlist_del(&e->hlist);
+	/* Make sure the call_rcu has been executed */
+	if (e->rcu_pending)
+		rcu_barrier();
+	kfree(e);
+	return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one tracepoint.
+ */
+static void set_tracepoint(struct tracepoint_entry **entry,
+	struct tracepoint *elem, int active)
+{
+	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+
+	smp_wmb();
+	/*
+	 * We also make sure that the new probe callbacks array is consistent
+	 * before setting a pointer to it.
+	 */
+	rcu_assign_pointer(elem->funcs, (*entry)->funcs);
+	elem->state = active;
+}
+
+/*
+ * Disable a tracepoint and its probe callback.
+ * Note: only waiting an RCU period after setting elem->call to the empty
+ * function insures that the original callback is not used anymore. This insured
+ * by preempt_disable around the call site.
+ */
+static void disable_tracepoint(struct tracepoint *elem)
+{
+	elem->state = 0;
+}
+
+/**
+ * tracepoint_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Updates the probe callback corresponding to a range of tracepoints.
+ */
+void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end)
+{
+	struct tracepoint *iter;
+	struct tracepoint_entry *mark_entry;
+
+	mutex_lock(&tracepoints_mutex);
+	for (iter = begin; iter < end; iter++) {
+		mark_entry = get_tracepoint(iter->name);
+		if (mark_entry) {
+			set_tracepoint(&mark_entry, iter,
+					!!mark_entry->refcount);
+		} else {
+			disable_tracepoint(iter);
+		}
+	}
+	mutex_unlock(&tracepoints_mutex);
+}
+
+/*
+ * Update probes, removing the faulty probes.
+ */
+static void tracepoint_update_probes(void)
+{
+	/* Core kernel tracepoints */
+	tracepoint_update_probe_range(__start___tracepoints,
+		__stop___tracepoints);
+	/* tracepoints in modules. */
+	module_update_tracepoints();
+}
+
+/**
+ * tracepoint_probe_register -  Connect a probe to a tracepoint
+ * @name: tracepoint name
+ * @probe: probe handler
+ *
+ * Returns 0 if ok, error value on error.
+ * The probe address must at least be aligned on the architecture pointer size.
+ */
+int tracepoint_probe_register(const char *name, void *probe)
+{
+	struct tracepoint_entry *entry;
+	int ret = 0;
+	void *old;
+
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	if (!entry) {
+		entry = add_tracepoint(name);
+		if (IS_ERR(entry)) {
+			ret = PTR_ERR(entry);
+			goto end;
+		}
+	}
+	/*
+	 * If we detect that a call_rcu is pending for this tracepoint,
+	 * make sure it's executed now.
+	 */
+	if (entry->rcu_pending)
+		rcu_barrier();
+	old = tracepoint_entry_add_probe(entry, probe);
+	if (IS_ERR(old)) {
+		ret = PTR_ERR(old);
+		goto end;
+	}
+	mutex_unlock(&tracepoints_mutex);
+	tracepoint_update_probes();		/* may update entry */
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	WARN_ON(!entry);
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+#ifdef CONFIG_PREEMPT_RCU
+	synchronize_sched();	/* Until we have the call_rcu_sched() */
+#endif
+	call_rcu(&entry->rcu, free_old_closure);
+end:
+	mutex_unlock(&tracepoints_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_register);
+
+/**
+ * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
+ * @name: tracepoint name
+ * @probe: probe function pointer
+ *
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
+ */
+int tracepoint_probe_unregister(const char *name, void *probe)
+{
+	struct tracepoint_entry *entry;
+	void *old;
+	int ret = -ENOENT;
+
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	if (!entry)
+		goto end;
+	if (entry->rcu_pending)
+		rcu_barrier();
+	old = tracepoint_entry_remove_probe(entry, probe);
+	mutex_unlock(&tracepoints_mutex);
+	tracepoint_update_probes();		/* may update entry */
+	mutex_lock(&tracepoints_mutex);
+	entry = get_tracepoint(name);
+	if (!entry)
+		goto end;
+	entry->oldptr = old;
+	entry->rcu_pending = 1;
+	/* write rcu_pending before calling the RCU callback */
+	smp_wmb();
+#ifdef CONFIG_PREEMPT_RCU
+	synchronize_sched();	/* Until we have the call_rcu_sched() */
+#endif
+	call_rcu(&entry->rcu, free_old_closure);
+	remove_tracepoint(name);	/* Ignore busy error message */
+	ret = 0;
+end:
+	mutex_unlock(&tracepoints_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+
+/**
+ * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
+ * @tracepoint: current tracepoints (in), next tracepoint (out)
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Returns whether a next tracepoint has been found (1) or not (0).
+ * Will return the first tracepoint in the range if the input tracepoint is
+ * NULL.
+ */
+int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+	struct tracepoint *begin, struct tracepoint *end)
+{
+	if (!*tracepoint && begin != end) {
+		*tracepoint = begin;
+		return 1;
+	}
+	if (*tracepoint >= begin && *tracepoint < end)
+		return 1;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
+
+static void tracepoint_get_iter(struct tracepoint_iter *iter)
+{
+	int found = 0;
+
+	/* Core kernel tracepoints */
+	if (!iter->module) {
+		found = tracepoint_get_iter_range(&iter->tracepoint,
+				__start___tracepoints, __stop___tracepoints);
+		if (found)
+			goto end;
+	}
+	/* tracepoints in modules. */
+	found = module_get_iter_tracepoints(iter);
+end:
+	if (!found)
+		tracepoint_iter_reset(iter);
+}
+
+void tracepoint_iter_start(struct tracepoint_iter *iter)
+{
+	tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_start);
+
+void tracepoint_iter_next(struct tracepoint_iter *iter)
+{
+	iter->tracepoint++;
+	/*
+	 * iter->tracepoint may be invalid because we blindly incremented it.
+	 * Make sure it is valid by marshalling on the tracepoints, getting the
+	 * tracepoints from following modules if necessary.
+	 */
+	tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_next);
+
+void tracepoint_iter_stop(struct tracepoint_iter *iter)
+{
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
+
+void tracepoint_iter_reset(struct tracepoint_iter *iter)
+{
+	iter->module = NULL;
+	iter->tracepoint = NULL;
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c	2008-07-04 10:48:08.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c	2008-07-04 11:12:39.000000000 -0400
@@ -46,6 +46,7 @@
 #include <asm/cacheflush.h>
 #include <linux/license.h>
 #include <asm/sections.h>
+#include <linux/tracepoint.h>
 
 #if 0
 #define DEBUGP printk
@@ -1770,6 +1771,8 @@ static struct module *load_module(void _
 	unsigned int unusedgplcrcindex;
 	unsigned int markersindex;
 	unsigned int markersstringsindex;
+	unsigned int tracepointsindex;
+	unsigned int tracepointsstringsindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -2049,6 +2052,9 @@ static struct module *load_module(void _
 	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
  	markersstringsindex = find_sec(hdr, sechdrs, secstrings,
 					"__markers_strings");
+	tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
+	tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
+					"__tracepoints_strings");
 
 	/* Now do relocations. */
 	for (i = 1; i < hdr->e_shnum; i++) {
@@ -2076,6 +2082,12 @@ static struct module *load_module(void _
 	mod->num_markers =
 		sechdrs[markersindex].sh_size / sizeof(*mod->markers);
 #endif
+#ifdef CONFIG_TRACEPOINTS
+	mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
+	mod->num_tracepoints =
+		sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
+#endif
+
 
         /* Find duplicate symbols */
 	err = verify_export_symbols(mod);
@@ -2094,11 +2106,16 @@ static struct module *load_module(void _
 
 	add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
 
+	if (!mod->taints) {
 #ifdef CONFIG_MARKERS
-	if (!mod->taints)
 		marker_update_probe_range(mod->markers,
 			mod->markers + mod->num_markers);
 #endif
+#ifdef CONFIG_TRACEPOINTS
+		tracepoint_update_probe_range(mod->tracepoints,
+			mod->tracepoints + mod->num_tracepoints);
+#endif
+	}
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
@@ -2646,3 +2663,50 @@ void module_update_markers(void)
 	mutex_unlock(&module_mutex);
 }
 #endif
+
+#ifdef CONFIG_TRACEPOINTS
+void module_update_tracepoints(void)
+{
+	struct module *mod;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(mod, &modules, list)
+		if (!mod->taints)
+			tracepoint_update_probe_range(mod->tracepoints,
+				mod->tracepoints + mod->num_tracepoints);
+	mutex_unlock(&module_mutex);
+}
+
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ */
+int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+	struct module *iter_mod;
+	int found = 0;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(iter_mod, &modules, list) {
+		if (!iter_mod->taints) {
+			/*
+			 * Sorted module list
+			 */
+			if (iter_mod < iter->module)
+				continue;
+			else if (iter_mod > iter->module)
+				iter->tracepoint = NULL;
+			found = tracepoint_get_iter_range(&iter->tracepoint,
+				iter_mod->tracepoints,
+				iter_mod->tracepoints
+					+ iter_mod->num_tracepoints);
+			if (found) {
+				iter->module = iter_mod;
+				break;
+			}
+		}
+	}
+	mutex_unlock(&module_mutex);
+	return found;
+}
+#endif
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h	2008-07-04 10:48:08.000000000 -0400
+++ linux-2.6-lttng/include/linux/module.h	2008-07-04 11:10:41.000000000 -0400
@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
 #include <linux/marker.h>
+#include <linux/tracepoint.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -342,6 +343,10 @@ struct module
 	struct marker *markers;
 	unsigned int num_markers;
 #endif
+#ifdef CONFIG_TRACEPOINTS
+	struct tracepoint *tracepoints;
+	unsigned int num_tracepoints;
+#endif
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
@@ -450,6 +455,9 @@ extern void print_modules(void);
 
 extern void module_update_markers(void);
 
+extern void module_update_tracepoints(void);
+extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
+
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)
 #define EXPORT_SYMBOL_GPL(sym)
@@ -554,6 +562,15 @@ static inline void module_update_markers
 {
 }
 
+static inline void module_update_tracepoints(void)
+{
+}
+
+static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+	return 0;
+}
+
 #endif /* CONFIG_MODULES */
 
 struct device_driver;

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC patch 02/12] LTTng tracepoint instrumentation fs
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
  2008-07-04 23:52 ` [RFC patch 01/12] Kernel Tracepoints Mathieu Desnoyers
@ 2008-07-04 23:52 ` Mathieu Desnoyers
  2008-07-04 23:52 ` [RFC patch 03/12] LTTng instrumentation ipc Mathieu Desnoyers
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt
  Cc: Mathieu Desnoyers, Alexander Viro, Hideo AOKI, Takashi Nishiie,
	Masami Hiramatsu

[-- Attachment #1: lttng-instrumentation-fs.patch --]
[-- Type: text/plain, Size: 10531 bytes --]

Core filesystem tracepoints.

Tracepoints added :

fs_buffer_wait_end
fs_buffer_wait_start
fs_close
fs_exec
fs_ioctl
fs_llseek
fs_lseek
fs_open
fs_poll
fs_pread64
fs_pwrite64
fs_read
fs_readv
fs_select
fs_write
fs_writev

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
CC: 'Peter Zijlstra' <peterz@infradead.org>
CC: "Frank Ch. Eigler" <fche@redhat.com>
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: 'Hideo AOKI' <haoki@redhat.com>
CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
CC: 'Steven Rostedt' <rostedt@goodmis.org>
CC: Masami Hiramatsu <mhiramat@redhat.com>
---
 fs/buffer.c     |    3 ++
 fs/compat.c     |    2 +
 fs/exec.c       |    2 +
 fs/fs-trace.h   |   65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ioctl.c      |    3 ++
 fs/open.c       |    3 ++
 fs/read_write.c |   19 ++++++++++++++--
 fs/select.c     |    3 ++
 8 files changed, 98 insertions(+), 2 deletions(-)

Index: linux-2.6-lttng/fs/buffer.c
===================================================================
--- linux-2.6-lttng.orig/fs/buffer.c	2008-07-04 18:49:23.000000000 -0400
+++ linux-2.6-lttng/fs/buffer.c	2008-07-04 18:49:33.000000000 -0400
@@ -41,6 +41,7 @@
 #include <linux/bitops.h>
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
+#include "fs-trace.h"
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 
@@ -89,7 +90,9 @@ void unlock_buffer(struct buffer_head *b
  */
 void __wait_on_buffer(struct buffer_head * bh)
 {
+	trace_fs_buffer_wait_start(bh);
 	wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
+	trace_fs_buffer_wait_end(bh);
 }
 
 static void
Index: linux-2.6-lttng/fs/compat.c
===================================================================
--- linux-2.6-lttng.orig/fs/compat.c	2008-07-04 18:49:23.000000000 -0400
+++ linux-2.6-lttng/fs/compat.c	2008-07-04 18:49:33.000000000 -0400
@@ -51,6 +51,7 @@
 #include <linux/poll.h>
 #include <linux/mm.h>
 #include <linux/eventpoll.h>
+#include "fs-trace.h"
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1402,6 +1403,7 @@ int compat_do_execve(char * filename,
 
 	retval = search_binary_handler(bprm, regs);
 	if (retval >= 0) {
+		trace_fs_exec(filename);
 		/* execve success */
 		security_bprm_free(bprm);
 		acct_update_integrals(current);
Index: linux-2.6-lttng/fs/ioctl.c
===================================================================
--- linux-2.6-lttng.orig/fs/ioctl.c	2008-07-04 18:49:23.000000000 -0400
+++ linux-2.6-lttng/fs/ioctl.c	2008-07-04 18:49:33.000000000 -0400
@@ -13,6 +13,7 @@
 #include <linux/security.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
+#include "fs-trace.h"
 
 #include <asm/ioctls.h>
 
@@ -201,6 +202,8 @@ asmlinkage long sys_ioctl(unsigned int f
 	if (!filp)
 		goto out;
 
+	trace_fs_ioctl(fd, cmd, arg);
+
 	error = security_file_ioctl(filp, cmd, arg);
 	if (error)
 		goto out_fput;
Index: linux-2.6-lttng/fs/open.c
===================================================================
--- linux-2.6-lttng.orig/fs/open.c	2008-07-04 18:49:23.000000000 -0400
+++ linux-2.6-lttng/fs/open.c	2008-07-04 18:49:33.000000000 -0400
@@ -28,6 +28,7 @@
 #include <linux/rcupdate.h>
 #include <linux/audit.h>
 #include <linux/falloc.h>
+#include "fs-trace.h"
 
 int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
@@ -1090,6 +1091,7 @@ long do_sys_open(int dfd, const char __u
 				fsnotify_open(f->f_path.dentry);
 				fd_install(fd, f);
 			}
+			trace_fs_open(fd, tmp);
 		}
 		putname(tmp);
 	}
@@ -1179,6 +1181,7 @@ asmlinkage long sys_close(unsigned int f
 	filp = fdt->fd[fd];
 	if (!filp)
 		goto out_unlock;
+	trace_fs_close(fd);
 	rcu_assign_pointer(fdt->fd[fd], NULL);
 	FD_CLR(fd, fdt->close_on_exec);
 	__put_unused_fd(files, fd);
Index: linux-2.6-lttng/fs/read_write.c
===================================================================
--- linux-2.6-lttng.orig/fs/read_write.c	2008-07-04 18:49:23.000000000 -0400
+++ linux-2.6-lttng/fs/read_write.c	2008-07-04 18:57:11.000000000 -0400
@@ -16,6 +16,7 @@
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
 #include <linux/splice.h>
+#include "fs-trace.h"
 #include "read_write.h"
 
 #include <asm/uaccess.h>
@@ -146,6 +147,9 @@ asmlinkage off_t sys_lseek(unsigned int 
 		if (res != (loff_t)retval)
 			retval = -EOVERFLOW;	/* LFS: should only happen on 32 bit platforms */
 	}
+
+	trace_fs_lseek(fd, offset, origin);
+
 	fput_light(file, fput_needed);
 bad:
 	return retval;
@@ -173,6 +177,8 @@ asmlinkage long sys_llseek(unsigned int 
 	offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
 			origin);
 
+	trace_fs_llseek(fd, offset, origin);
+
 	retval = (int)offset;
 	if (offset >= 0) {
 		retval = -EFAULT;
@@ -360,6 +366,7 @@ asmlinkage ssize_t sys_read(unsigned int
 	if (file) {
 		loff_t pos = file_pos_read(file);
 		ret = vfs_read(file, buf, count, &pos);
+		trace_fs_read(fd, buf, count, ret);
 		file_pos_write(file, pos);
 		fput_light(file, fput_needed);
 	}
@@ -377,6 +384,7 @@ asmlinkage ssize_t sys_write(unsigned in
 	if (file) {
 		loff_t pos = file_pos_read(file);
 		ret = vfs_write(file, buf, count, &pos);
+		trace_fs_write(fd, buf, count, ret);
 		file_pos_write(file, pos);
 		fput_light(file, fput_needed);
 	}
@@ -397,8 +405,11 @@ asmlinkage ssize_t sys_pread64(unsigned 
 	file = fget_light(fd, &fput_needed);
 	if (file) {
 		ret = -ESPIPE;
-		if (file->f_mode & FMODE_PREAD)
+		if (file->f_mode & FMODE_PREAD) {
 			ret = vfs_read(file, buf, count, &pos);
+			trace_fs_pread64(fd, buf, count, pos, ret);
+		}
+
 		fput_light(file, fput_needed);
 	}
 
@@ -418,8 +429,10 @@ asmlinkage ssize_t sys_pwrite64(unsigned
 	file = fget_light(fd, &fput_needed);
 	if (file) {
 		ret = -ESPIPE;
-		if (file->f_mode & FMODE_PWRITE)  
+		if (file->f_mode & FMODE_PWRITE) {
 			ret = vfs_write(file, buf, count, &pos);
+			trace_fs_pwrite64(fd, buf, count, pos, ret);
+		}
 		fput_light(file, fput_needed);
 	}
 
@@ -664,6 +677,7 @@ sys_readv(unsigned long fd, const struct
 	if (file) {
 		loff_t pos = file_pos_read(file);
 		ret = vfs_readv(file, vec, vlen, &pos);
+		trace_fs_readv(fd, vec, vlen, ret);
 		file_pos_write(file, pos);
 		fput_light(file, fput_needed);
 	}
@@ -685,6 +699,7 @@ sys_writev(unsigned long fd, const struc
 	if (file) {
 		loff_t pos = file_pos_read(file);
 		ret = vfs_writev(file, vec, vlen, &pos);
+		trace_fs_writev(fd, vec, vlen, ret);
 		file_pos_write(file, pos);
 		fput_light(file, fput_needed);
 	}
Index: linux-2.6-lttng/fs/select.c
===================================================================
--- linux-2.6-lttng.orig/fs/select.c	2008-07-04 18:49:23.000000000 -0400
+++ linux-2.6-lttng/fs/select.c	2008-07-04 18:49:33.000000000 -0400
@@ -24,6 +24,7 @@
 #include <linux/fdtable.h>
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
+#include "fs-trace.h"
 
 #include <asm/uaccess.h>
 
@@ -232,6 +233,7 @@ int do_select(int n, fd_set_bits *fds, s
 				file = fget_light(i, &fput_needed);
 				if (file) {
 					f_op = file->f_op;
+					trace_fs_select(i, *timeout);
 					mask = DEFAULT_POLLMASK;
 					if (f_op && f_op->poll)
 						mask = (*f_op->poll)(file, retval ? NULL : wait);
@@ -560,6 +562,7 @@ static inline unsigned int do_pollfd(str
 		file = fget_light(fd, &fput_needed);
 		mask = POLLNVAL;
 		if (file != NULL) {
+			trace_fs_poll(fd);
 			mask = DEFAULT_POLLMASK;
 			if (file->f_op && file->f_op->poll)
 				mask = file->f_op->poll(file, pwait);
Index: linux-2.6-lttng/fs/exec.c
===================================================================
--- linux-2.6-lttng.orig/fs/exec.c	2008-07-04 18:49:23.000000000 -0400
+++ linux-2.6-lttng/fs/exec.c	2008-07-04 18:49:33.000000000 -0400
@@ -51,6 +51,7 @@
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
+#include "fs-trace.h"
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1330,6 +1331,7 @@ int do_execve(char * filename,
 
 	retval = search_binary_handler(bprm,regs);
 	if (retval >= 0) {
+		trace_fs_exec(filename);
 		/* execve success */
 		security_bprm_free(bprm);
 		acct_update_integrals(current);
Index: linux-2.6-lttng/fs/fs-trace.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/fs/fs-trace.h	2008-07-04 18:56:19.000000000 -0400
@@ -0,0 +1,65 @@
+#ifndef _FS_TRACE_H
+#define _FS_TRACE_H
+
+#include <linux/buffer_head.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(fs_buffer_wait_start,
+	TPPROTO(struct buffer_head *bh),
+	TPARGS(bh));
+DEFINE_TRACE(fs_buffer_wait_end,
+	TPPROTO(struct buffer_head *bh),
+	TPARGS(bh));
+DEFINE_TRACE(fs_exec,
+	TPPROTO(char *filename),
+	TPARGS(filename));
+DEFINE_TRACE(fs_ioctl,
+	TPPROTO(unsigned int fd, unsigned int cmd, unsigned long arg),
+	TPARGS(fd, cmd, arg));
+DEFINE_TRACE(fs_open,
+	TPPROTO(int fd, char *filename),
+	TPARGS(fd, filename));
+DEFINE_TRACE(fs_close,
+	TPPROTO(unsigned int fd),
+	TPARGS(fd));
+DEFINE_TRACE(fs_lseek,
+	TPPROTO(unsigned int fd, long offset, unsigned int origin),
+	TPARGS(fd, offset, origin));
+DEFINE_TRACE(fs_llseek,
+	TPPROTO(unsigned int fd, loff_t offset, unsigned int origin),
+	TPARGS(fd, offset, origin));
+
+/*
+ * Probes must be aware that __user * may be modified by concurrent userspace
+ * or kernel threads.
+ */
+DEFINE_TRACE(fs_read,
+	TPPROTO(unsigned int fd, char __user *buf, size_t count, ssize_t ret),
+	TPARGS(fd, buf, count, ret));
+DEFINE_TRACE(fs_write,
+	TPPROTO(unsigned int fd, const char __user *buf, size_t count,
+		ssize_t ret),
+	TPARGS(fd, buf, count, ret));
+DEFINE_TRACE(fs_pread64,
+	TPPROTO(unsigned int fd, char __user *buf, size_t count, loff_t pos,
+		ssize_t ret),
+	TPARGS(fd, buf, count, pos, ret));
+DEFINE_TRACE(fs_pwrite64,
+	TPPROTO(unsigned int fd, const char __user *buf, size_t count,
+		loff_t pos, ssize_t ret),
+	TPARGS(fd, buf, count, pos, ret));
+DEFINE_TRACE(fs_readv,
+	TPPROTO(unsigned long fd, const struct iovec __user *vec,
+		unsigned long vlen, ssize_t ret),
+	TPARGS(fd, vec, vlen, ret));
+DEFINE_TRACE(fs_writev,
+	TPPROTO(unsigned long fd, const struct iovec __user *vec,
+		unsigned long vlen, ssize_t ret),
+	TPARGS(fd, vec, vlen, ret));
+DEFINE_TRACE(fs_select,
+	TPPROTO(int fd, s64 timeout),
+	TPARGS(fd, timeout));
+DEFINE_TRACE(fs_poll,
+	TPPROTO(int fd),
+	TPARGS(fd));
+#endif

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC patch 03/12] LTTng instrumentation ipc
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
  2008-07-04 23:52 ` [RFC patch 01/12] Kernel Tracepoints Mathieu Desnoyers
  2008-07-04 23:52 ` [RFC patch 02/12] LTTng tracepoint instrumentation fs Mathieu Desnoyers
@ 2008-07-04 23:52 ` Mathieu Desnoyers
  2008-07-04 23:52 ` [RFC patch 04/12] LTTng instrumentation kernel Mathieu Desnoyers
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt
  Cc: Mathieu Desnoyers, Hideo AOKI, Takashi Nishiie

[-- Attachment #1: lttng-instrumentation-ipc.patch --]
[-- Type: text/plain, Size: 4241 bytes --]

Interprocess communication, core events.

Added tracepoints :

ipc_msg_create
ipc_sem_create
ipc_shm_create

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: 'Peter Zijlstra' <peterz@infradead.org>
CC: "Frank Ch. Eigler" <fche@redhat.com>
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: 'Hideo AOKI' <haoki@redhat.com>
CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
CC: 'Steven Rostedt' <rostedt@goodmis.org>
---
 ipc/ipc-trace.h |   15 +++++++++++++++
 ipc/msg.c       |    6 +++++-
 ipc/sem.c       |    6 +++++-
 ipc/shm.c       |    6 +++++-
 4 files changed, 30 insertions(+), 3 deletions(-)

Index: linux-2.6-lttng/ipc/msg.c
===================================================================
--- linux-2.6-lttng.orig/ipc/msg.c	2008-07-03 12:45:36.000000000 -0400
+++ linux-2.6-lttng/ipc/msg.c	2008-07-03 12:47:14.000000000 -0400
@@ -38,6 +38,7 @@
 #include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include "ipc-trace.h"
 
 #include <asm/current.h>
 #include <asm/uaccess.h>
@@ -314,6 +315,7 @@ asmlinkage long sys_msgget(key_t key, in
 	struct ipc_namespace *ns;
 	struct ipc_ops msg_ops;
 	struct ipc_params msg_params;
+	long ret;
 
 	ns = current->nsproxy->ipc_ns;
 
@@ -324,7 +326,9 @@ asmlinkage long sys_msgget(key_t key, in
 	msg_params.key = key;
 	msg_params.flg = msgflg;
 
-	return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
+	ret = ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
+	trace_ipc_msg_create(ret, msgflg);
+	return ret;
 }
 
 static inline unsigned long
Index: linux-2.6-lttng/ipc/sem.c
===================================================================
--- linux-2.6-lttng.orig/ipc/sem.c	2008-07-03 12:45:36.000000000 -0400
+++ linux-2.6-lttng/ipc/sem.c	2008-07-03 12:47:14.000000000 -0400
@@ -83,6 +83,7 @@
 #include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include "ipc-trace.h"
 
 #include <asm/uaccess.h>
 #include "util.h"
@@ -314,6 +315,7 @@ asmlinkage long sys_semget(key_t key, in
 	struct ipc_namespace *ns;
 	struct ipc_ops sem_ops;
 	struct ipc_params sem_params;
+	long err;
 
 	ns = current->nsproxy->ipc_ns;
 
@@ -328,7 +330,9 @@ asmlinkage long sys_semget(key_t key, in
 	sem_params.flg = semflg;
 	sem_params.u.nsems = nsems;
 
-	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
+	err = ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
+	trace_ipc_sem_create(err, semflg);
+	return err;
 }
 
 /* Manage the doubly linked list sma->sem_pending as a FIFO:
Index: linux-2.6-lttng/ipc/shm.c
===================================================================
--- linux-2.6-lttng.orig/ipc/shm.c	2008-07-03 12:45:36.000000000 -0400
+++ linux-2.6-lttng/ipc/shm.c	2008-07-03 12:47:14.000000000 -0400
@@ -39,6 +39,7 @@
 #include <linux/nsproxy.h>
 #include <linux/mount.h>
 #include <linux/ipc_namespace.h>
+#include "ipc-trace.h"
 
 #include <asm/uaccess.h>
 
@@ -460,6 +461,7 @@ asmlinkage long sys_shmget (key_t key, s
 	struct ipc_namespace *ns;
 	struct ipc_ops shm_ops;
 	struct ipc_params shm_params;
+	long err;
 
 	ns = current->nsproxy->ipc_ns;
 
@@ -471,7 +473,9 @@ asmlinkage long sys_shmget (key_t key, s
 	shm_params.flg = shmflg;
 	shm_params.u.size = size;
 
-	return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
+	err = ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
+	trace_ipc_shm_create(err, shmflg);
+	return err;
 }
 
 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
Index: linux-2.6-lttng/ipc/ipc-trace.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/ipc/ipc-trace.h	2008-07-03 12:47:14.000000000 -0400
@@ -0,0 +1,15 @@
+#ifndef _IPC_TRACE_H
+#define _IPC_TRACE_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(ipc_msg_create,
+	TPPROTO(long id, int flags),
+	TPARGS(id, flags));
+DEFINE_TRACE(ipc_sem_create,
+	TPPROTO(long id, int flags),
+	TPARGS(id, flags));
+DEFINE_TRACE(ipc_shm_create,
+	TPPROTO(long id, int flags),
+	TPARGS(id, flags));
+#endif

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC patch 04/12] LTTng instrumentation kernel
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
                   ` (2 preceding siblings ...)
  2008-07-04 23:52 ` [RFC patch 03/12] LTTng instrumentation ipc Mathieu Desnoyers
@ 2008-07-04 23:52 ` Mathieu Desnoyers
  2008-07-07 16:36   ` Masami Hiramatsu
  2008-07-04 23:52 ` [RFC patch 05/12] LTTng instrumentation mm Mathieu Desnoyers
                   ` (8 subsequent siblings)
  12 siblings, 1 reply; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt
  Cc: Mathieu Desnoyers, Hideo AOKI, Takashi Nishiie, Masami Hiramatsu

[-- Attachment #1: lttng-instrumentation-kernel.patch --]
[-- Type: text/plain, Size: 16577 bytes --]

Core kernel events.

*not* present in this patch because they are architecture specific :
- syscall entry/exit
- traps
- kernel thread creation

Added markers :

kernel_irq_entry
kernel_irq_exit
kernel_kthread_stop
kernel_kthread_stop_ret
kernel_module_free
kernel_module_load
kernel_printk
kernel_process_exit
kernel_process_fork
kernel_process_free
kernel_process_wait
kernel_sched_migrate_task
kernel_sched_schedule
kernel_sched_try_wakeup
kernel_sched_wait_task
kernel_sched_wakeup_new_task
kernel_send_signal
kernel_softirq_entry
kernel_softirq_exit
kernel_softirq_raise
kernel_tasklet_high_entry
kernel_tasklet_high_exit
kernel_tasklet_low_entry
kernel_tasklet_low_exit
kernel_timer_itimer_expired
kernel_timer_itimer_set
kernel_timer_set
kernel_timer_timeout
kernel_timer_update_time
kernel_vprintk

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: 'Peter Zijlstra' <peterz@infradead.org>
CC: "Frank Ch. Eigler" <fche@redhat.com>
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: 'Hideo AOKI' <haoki@redhat.com>
CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
CC: 'Steven Rostedt' <rostedt@goodmis.org>
CC: Masami Hiramatsu <mhiramat@redhat.com>
---
 kernel/exit.c         |    6 ++
 kernel/fork.c         |    3 +
 kernel/irq/handle.c   |    6 ++
 kernel/itimer.c       |    5 ++
 kernel/kernel-trace.h |  106 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/kthread.c      |    5 ++
 kernel/module.c       |    5 ++
 kernel/printk.c       |    5 ++
 kernel/sched.c        |    6 ++
 kernel/signal.c       |    3 +
 kernel/softirq.c      |    8 +++
 kernel/timer.c        |    8 +++
 12 files changed, 165 insertions(+), 1 deletion(-)

Index: linux-2.6-lttng/kernel/irq/handle.c
===================================================================
--- linux-2.6-lttng.orig/kernel/irq/handle.c	2008-07-04 16:59:29.000000000 -0400
+++ linux-2.6-lttng/kernel/irq/handle.c	2008-07-04 17:38:16.000000000 -0400
@@ -15,6 +15,7 @@
 #include <linux/random.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include "../kernel-trace.h"
 
 #include "internals.h"
 
@@ -130,6 +131,9 @@ irqreturn_t handle_IRQ_event(unsigned in
 {
 	irqreturn_t ret, retval = IRQ_NONE;
 	unsigned int status = 0;
+	struct pt_regs *regs = get_irq_regs();
+
+	trace_kernel_irq_entry(irq, regs);
 
 	handle_dynamic_tick(action);
 
@@ -148,6 +152,8 @@ irqreturn_t handle_IRQ_event(unsigned in
 		add_interrupt_randomness(irq);
 	local_irq_disable();
 
+	trace_kernel_irq_exit();
+
 	return retval;
 }
 
Index: linux-2.6-lttng/kernel/itimer.c
===================================================================
--- linux-2.6-lttng.orig/kernel/itimer.c	2008-07-04 16:59:29.000000000 -0400
+++ linux-2.6-lttng/kernel/itimer.c	2008-07-04 17:38:16.000000000 -0400
@@ -12,6 +12,7 @@
 #include <linux/time.h>
 #include <linux/posix-timers.h>
 #include <linux/hrtimer.h>
+#include "kernel-trace.h"
 
 #include <asm/uaccess.h>
 
@@ -132,6 +133,8 @@ enum hrtimer_restart it_real_fn(struct h
 	struct signal_struct *sig =
 		container_of(timer, struct signal_struct, real_timer);
 
+	trace_kernel_timer_itimer_expired(sig);
+
 	kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
 
 	return HRTIMER_NORESTART;
@@ -157,6 +160,8 @@ int do_setitimer(int which, struct itime
 	    !timeval_valid(&value->it_interval))
 		return -EINVAL;
 
+	trace_kernel_timer_itimer_set(which, value);
+
 	switch (which) {
 	case ITIMER_REAL:
 again:
Index: linux-2.6-lttng/kernel/kthread.c
===================================================================
--- linux-2.6-lttng.orig/kernel/kthread.c	2008-07-04 16:59:29.000000000 -0400
+++ linux-2.6-lttng/kernel/kthread.c	2008-07-04 17:38:16.000000000 -0400
@@ -13,6 +13,7 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include "kernel-trace.h"
 
 #define KTHREAD_NICE_LEVEL (-5)
 
@@ -205,6 +206,8 @@ int kthread_stop(struct task_struct *k)
 	/* It could exit after stop_info.k set, but before wake_up_process. */
 	get_task_struct(k);
 
+	trace_kernel_kthread_stop(k);
+
 	/* Must init completion *before* thread sees kthread_stop_info.k */
 	init_completion(&kthread_stop_info.done);
 	smp_wmb();
@@ -220,6 +223,8 @@ int kthread_stop(struct task_struct *k)
 	ret = kthread_stop_info.err;
 	mutex_unlock(&kthread_stop_lock);
 
+	trace_kernel_kthread_stop_ret(ret);
+
 	return ret;
 }
 EXPORT_SYMBOL(kthread_stop);
Index: linux-2.6-lttng/kernel/printk.c
===================================================================
--- linux-2.6-lttng.orig/kernel/printk.c	2008-07-04 16:59:29.000000000 -0400
+++ linux-2.6-lttng/kernel/printk.c	2008-07-04 17:38:16.000000000 -0400
@@ -32,6 +32,7 @@
 #include <linux/security.h>
 #include <linux/bootmem.h>
 #include <linux/syscalls.h>
+#include "kernel-trace.h"
 
 #include <asm/uaccess.h>
 
@@ -610,6 +611,7 @@ asmlinkage int printk(const char *fmt, .
 	int r;
 
 	va_start(args, fmt);
+	trace_kernel_printk(__builtin_return_address(0));
 	r = vprintk(fmt, args);
 	va_end(args);
 
@@ -687,6 +689,9 @@ asmlinkage int vprintk(const char *fmt, 
 	raw_local_irq_save(flags);
 	this_cpu = smp_processor_id();
 
+	trace_kernel_vprintk(__builtin_return_address(0),
+		printk_buf, printed_len);
+
 	/*
 	 * Ouch, printk recursed into itself!
 	 */
Index: linux-2.6-lttng/kernel/sched.c
===================================================================
--- linux-2.6-lttng.orig/kernel/sched.c	2008-07-04 16:59:29.000000000 -0400
+++ linux-2.6-lttng/kernel/sched.c	2008-07-04 17:38:16.000000000 -0400
@@ -70,6 +70,7 @@
 #include <linux/bootmem.h>
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
+#include "kernel-trace.h"
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -1806,6 +1807,7 @@ void wait_task_inactive(struct task_stru
 		 * just go back and repeat.
 		 */
 		rq = task_rq_lock(p, &flags);
+		trace_kernel_sched_wait_task(p);
 		running = task_running(rq, p);
 		on_rq = p->se.on_rq;
 		task_rq_unlock(rq, &flags);
@@ -2087,6 +2089,7 @@ static int try_to_wake_up(struct task_st
 
 	smp_wmb();
 	rq = task_rq_lock(p, &flags);
+	trace_kernel_sched_try_wakeup(p);
 	old_state = p->state;
 	if (!(old_state & state))
 		goto out;
@@ -2264,6 +2267,7 @@ void wake_up_new_task(struct task_struct
 	struct rq *rq;
 
 	rq = task_rq_lock(p, &flags);
+	trace_kernel_sched_wakeup_new_task(p);
 	BUG_ON(p->state != TASK_RUNNING);
 	update_rq_clock(rq);
 
@@ -2451,6 +2455,7 @@ context_switch(struct rq *rq, struct tas
 	struct mm_struct *mm, *oldmm;
 
 	prepare_task_switch(rq, prev, next);
+	trace_kernel_sched_schedule(prev, next);
 	mm = next->mm;
 	oldmm = prev->active_mm;
 	/*
@@ -2683,6 +2688,7 @@ static void sched_migrate_task(struct ta
 	    || unlikely(cpu_is_offline(dest_cpu)))
 		goto out;
 
+	trace_kernel_sched_migrate_task(p, dest_cpu);
 	/* force the process onto the specified CPU */
 	if (migrate_task(p, dest_cpu, &req)) {
 		/* Need to wait for migration thread (might exit: take ref). */
Index: linux-2.6-lttng/kernel/signal.c
===================================================================
--- linux-2.6-lttng.orig/kernel/signal.c	2008-07-04 16:59:29.000000000 -0400
+++ linux-2.6-lttng/kernel/signal.c	2008-07-04 17:38:16.000000000 -0400
@@ -26,6 +26,7 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
+#include "kernel-trace.h"
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -807,6 +808,8 @@ static int send_signal(int sig, struct s
 	struct sigpending *pending;
 	struct sigqueue *q;
 
+	trace_kernel_signal_send(sig, t);
+
 	assert_spin_locked(&t->sighand->siglock);
 	if (!prepare_signal(sig, t))
 		return 0;
Index: linux-2.6-lttng/kernel/softirq.c
===================================================================
--- linux-2.6-lttng.orig/kernel/softirq.c	2008-07-04 16:59:29.000000000 -0400
+++ linux-2.6-lttng/kernel/softirq.c	2008-07-04 17:38:16.000000000 -0400
@@ -21,6 +21,7 @@
 #include <linux/rcupdate.h>
 #include <linux/smp.h>
 #include <linux/tick.h>
+#include "kernel-trace.h"
 
 #include <asm/irq.h>
 /*
@@ -231,7 +232,9 @@ restart:
 
 	do {
 		if (pending & 1) {
+			trace_kernel_softirq_entry(h, softirq_vec);
 			h->action(h);
+			trace_kernel_softirq_exit(h, softirq_vec);
 			rcu_bh_qsctr_inc(cpu);
 		}
 		h++;
@@ -323,6 +326,7 @@ void irq_exit(void)
  */
 inline void raise_softirq_irqoff(unsigned int nr)
 {
+	trace_kernel_softirq_raise(nr);
 	__raise_softirq_irqoff(nr);
 
 	/*
@@ -412,7 +416,9 @@ static void tasklet_action(struct softir
 			if (!atomic_read(&t->count)) {
 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
 					BUG();
+				trace_kernel_tasklet_low_entry(t);
 				t->func(t->data);
+				trace_kernel_tasklet_low_exit(t);
 				tasklet_unlock(t);
 				continue;
 			}
@@ -447,7 +453,9 @@ static void tasklet_hi_action(struct sof
 			if (!atomic_read(&t->count)) {
 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
 					BUG();
+				trace_kernel_tasklet_high_entry(t);
 				t->func(t->data);
+				trace_kernel_tasklet_high_exit(t);
 				tasklet_unlock(t);
 				continue;
 			}
Index: linux-2.6-lttng/kernel/timer.c
===================================================================
--- linux-2.6-lttng.orig/kernel/timer.c	2008-07-04 16:59:29.000000000 -0400
+++ linux-2.6-lttng/kernel/timer.c	2008-07-04 17:38:16.000000000 -0400
@@ -37,12 +37,14 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
+#include "kernel-trace.h"
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/div64.h>
 #include <asm/timex.h>
 #include <asm/io.h>
+#include <asm/irq_regs.h>
 
 u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
 
@@ -288,6 +290,7 @@ static void internal_add_timer(struct tv
 		i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
 		vec = base->tv5.vec + i;
 	}
+	trace_kernel_timer_set(timer);
 	/*
 	 * Timers are FIFO:
 	 */
@@ -1074,6 +1077,7 @@ void do_timer(unsigned long ticks)
 {
 	jiffies_64 += ticks;
 	update_times(ticks);
+	trace_kernel_timer_update_time();
 }
 
 #ifdef __ARCH_WANT_SYS_ALARM
@@ -1155,7 +1159,9 @@ asmlinkage long sys_getegid(void)
 
 static void process_timeout(unsigned long __data)
 {
-	wake_up_process((struct task_struct *)__data);
+	struct task_struct *task = (struct task_struct *)__data;
+	trace_kernel_timer_timeout(task);
+	wake_up_process(task);
 }
 
 /**
Index: linux-2.6-lttng/kernel/exit.c
===================================================================
--- linux-2.6-lttng.orig/kernel/exit.c	2008-07-04 16:59:29.000000000 -0400
+++ linux-2.6-lttng/kernel/exit.c	2008-07-04 17:38:16.000000000 -0400
@@ -45,6 +45,7 @@
 #include <linux/resource.h>
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
+#include "kernel-trace.h"
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -148,6 +149,7 @@ static void __exit_signal(struct task_st
 
 static void delayed_put_task_struct(struct rcu_head *rhp)
 {
+	trace_kernel_process_free(container_of(rhp, struct task_struct, rcu));
 	put_task_struct(container_of(rhp, struct task_struct, rcu));
 }
 
@@ -1042,6 +1044,8 @@ NORET_TYPE void do_exit(long code)
 
 	if (group_dead)
 		acct_process();
+	trace_kernel_process_exit(tsk);
+
 	exit_sem(tsk);
 	exit_files(tsk);
 	exit_fs(tsk);
@@ -1526,6 +1530,8 @@ static long do_wait(enum pid_type type, 
 	struct task_struct *tsk;
 	int flag, retval;
 
+	trace_kernel_process_wait(pid);
+
 	add_wait_queue(&current->signal->wait_chldexit,&wait);
 repeat:
 	/* If there is nothing that can match our critier just get out */
Index: linux-2.6-lttng/kernel/fork.c
===================================================================
--- linux-2.6-lttng.orig/kernel/fork.c	2008-07-04 16:59:29.000000000 -0400
+++ linux-2.6-lttng/kernel/fork.c	2008-07-04 17:38:16.000000000 -0400
@@ -54,6 +54,7 @@
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
+#include "kernel-trace.h"
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1356,6 +1357,8 @@ long do_fork(unsigned long clone_flags,
 	if (!IS_ERR(p)) {
 		struct completion vfork;
 
+		trace_kernel_process_fork(current, p);
+
 		nr = task_pid_vnr(p);
 
 		if (clone_flags & CLONE_PARENT_SETTID)
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c	2008-07-04 16:59:29.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c	2008-07-04 17:38:16.000000000 -0400
@@ -47,6 +47,7 @@
 #include <linux/license.h>
 #include <asm/sections.h>
 #include <linux/tracepoint.h>
+#include "kernel-trace.h"
 
 #if 0
 #define DEBUGP printk
@@ -1386,6 +1387,8 @@ static int __unlink_module(void *_mod)
 /* Free a module, remove from lists, etc (must hold module_mutex). */
 static void free_module(struct module *mod)
 {
+	trace_kernel_module_free(mod);
+
 	/* Delete from various lists */
 	stop_machine_run(__unlink_module, mod, NR_CPUS);
 	remove_notes_attrs(mod);
@@ -2176,6 +2179,8 @@ static struct module *load_module(void _
 	/* Get rid of temporary copy */
 	vfree(hdr);
 
+	trace_kernel_module_load(mod);
+
 	/* Done! */
 	return mod;
 
Index: linux-2.6-lttng/kernel/kernel-trace.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/kernel-trace.h	2008-07-04 17:38:16.000000000 -0400
@@ -0,0 +1,106 @@
+#ifndef _KERNEL_TRACE_H
+#define _KERNEL_TRACE_H
+
+#include <linux/kdebug.h>
+#include <linux/interrupt.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(kernel_irq_entry,
+	TPPROTO(unsigned int id, struct pt_regs *regs),
+	TPARGS(id, regs));
+DEFINE_TRACE(kernel_irq_exit,
+	TPPROTO(void),
+	TPARGS());
+DEFINE_TRACE(kernel_timer_itimer_expired,
+	TPPROTO(struct signal_struct *sig),
+	TPARGS(sig));
+DEFINE_TRACE(kernel_timer_itimer_set,
+	TPPROTO(int which, struct itimerval *value),
+	TPARGS(which, value));
+DEFINE_TRACE(kernel_kthread_stop,
+	TPPROTO(struct task_struct *t),
+	TPARGS(t));
+DEFINE_TRACE(kernel_kthread_stop_ret,
+	TPPROTO(int ret),
+	TPARGS(ret));
+DEFINE_TRACE(kernel_printk,
+	TPPROTO(void *retaddr),
+	TPARGS(retaddr));
+DEFINE_TRACE(kernel_vprintk,
+	TPPROTO(void *retaddr, char *buf, int len),
+	TPARGS(retaddr, buf, len));
+
+/*
+ * Scheduler trace points.
+ */
+DEFINE_TRACE(kernel_sched_wait_task,
+	TPPROTO(struct task_struct *p),
+	TPARGS(p));
+DEFINE_TRACE(kernel_sched_try_wakeup,
+	TPPROTO(struct task_struct *p),
+	TPARGS(p));
+DEFINE_TRACE(kernel_sched_wakeup_new_task,
+	TPPROTO(struct task_struct *p),
+	TPARGS(p));
+DEFINE_TRACE(kernel_sched_schedule,
+	TPPROTO(struct task_struct *prev, struct task_struct *next),
+	TPARGS(prev, next));
+DEFINE_TRACE(kernel_sched_migrate_task,
+	TPPROTO(struct task_struct *p, int dest_cpu),
+	TPARGS(p, dest_cpu));
+
+DEFINE_TRACE(kernel_signal_send,
+	TPPROTO(int sig, struct task_struct *p),
+	TPARGS(sig, p));
+DEFINE_TRACE(kernel_softirq_entry,
+	TPPROTO(struct softirq_action *h, struct softirq_action *softirq_vec),
+	TPARGS(h, softirq_vec));
+DEFINE_TRACE(kernel_softirq_exit,
+	TPPROTO(struct softirq_action *h, struct softirq_action *softirq_vec),
+	TPARGS(h, softirq_vec));
+DEFINE_TRACE(kernel_softirq_raise,
+	TPPROTO(unsigned int nr),
+	TPARGS(nr));
+DEFINE_TRACE(kernel_tasklet_low_entry,
+	TPPROTO(struct tasklet_struct *t),
+	TPARGS(t));
+DEFINE_TRACE(kernel_tasklet_low_exit,
+	TPPROTO(struct tasklet_struct *t),
+	TPARGS(t));
+DEFINE_TRACE(kernel_tasklet_high_entry,
+	TPPROTO(struct tasklet_struct *t),
+	TPARGS(t));
+DEFINE_TRACE(kernel_tasklet_high_exit,
+	TPPROTO(struct tasklet_struct *t),
+	TPARGS(t));
+DEFINE_TRACE(kernel_timer_set,
+	TPPROTO(struct timer_list *timer),
+	TPARGS(timer));
+/*
+ * xtime_lock is taken when kernel_timer_update_time tracepoint is reached.
+ */
+DEFINE_TRACE(kernel_timer_update_time,
+	TPPROTO(void),
+	TPARGS());
+DEFINE_TRACE(kernel_timer_timeout,
+	TPPROTO(struct task_struct *p),
+	TPARGS(p));
+DEFINE_TRACE(kernel_process_free,
+	TPPROTO(struct task_struct *p),
+	TPARGS(p));
+DEFINE_TRACE(kernel_process_exit,
+	TPPROTO(struct task_struct *p),
+	TPARGS(p));
+DEFINE_TRACE(kernel_process_wait,
+	TPPROTO(struct pid *pid),
+	TPARGS(pid));
+DEFINE_TRACE(kernel_process_fork,
+	TPPROTO(struct task_struct *parent, struct task_struct *child),
+	TPARGS(parent, child));
+DEFINE_TRACE(kernel_module_free,
+	TPPROTO(struct module *mod),
+	TPARGS(mod));
+DEFINE_TRACE(kernel_module_load,
+	TPPROTO(struct module *mod),
+	TPARGS(mod));
+#endif

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC patch 05/12] LTTng instrumentation mm
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
                   ` (3 preceding siblings ...)
  2008-07-04 23:52 ` [RFC patch 04/12] LTTng instrumentation kernel Mathieu Desnoyers
@ 2008-07-04 23:52 ` Mathieu Desnoyers
  2008-07-05  9:42   ` KOSAKI Motohiro
  2008-07-04 23:52 ` [RFC patch 06/12] LTTng instrumentation net Mathieu Desnoyers
                   ` (7 subsequent siblings)
  12 siblings, 1 reply; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt
  Cc: Mathieu Desnoyers, linux-mm, Dave Hansen, Hideo AOKI,
	Takashi Nishiie, Masami Hiramatsu

[-- Attachment #1: lttng-instrumentation-mm.patch --]
[-- Type: text/plain, Size: 9441 bytes --]

Memory management core events.

Added tracepoints :

mm_filemap_wait_end
mm_filemap_wait_start
mm_handle_fault_entry
mm_handle_fault_exit
mm_huge_page_alloc
mm_huge_page_free
mm_page_alloc
mm_page_free
mm_swap_file_close
mm_swap_file_open
mm_swap_in
mm_swap_out

Changelog:
- Use page_to_pfn for swap out instrumentation, wait_on_page_bit, do_swap_page,
  page alloc/free.
- add missing free_hot_cold_page instrumentation.
- add hugetlb page_alloc page_free instrumentation.
- Add write_access to mm fault.
- Add page bit_nr waited for by wait_on_page_bit.
- Move page alloc instrumentation to __aloc_pages so we cover the alloc zeroed
  page path.
- Add swap file used for swap in and swap out events.
- Dump the swap files, instrument swapon and swapoff.
- Move to tracepoints

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: linux-mm@kvack.org
CC: Dave Hansen <haveblue@us.ibm.com>
CC: 'Peter Zijlstra' <peterz@infradead.org>
CC: "Frank Ch. Eigler" <fche@redhat.com>
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: 'Hideo AOKI' <haoki@redhat.com>
CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
CC: 'Steven Rostedt' <rostedt@goodmis.org>
CC: Masami Hiramatsu <mhiramat@redhat.com>
---
 mm/filemap.c    |    3 +++
 mm/hugetlb.c    |    3 +++
 mm/memory.c     |   34 +++++++++++++++++++++++++---------
 mm/mm-trace.h   |   46 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c |    6 ++++++
 mm/page_io.c    |    2 ++
 mm/swapfile.c   |    3 +++
 7 files changed, 88 insertions(+), 9 deletions(-)

Index: linux-2.6-lttng/mm/filemap.c
===================================================================
--- linux-2.6-lttng.orig/mm/filemap.c	2008-07-04 18:26:02.000000000 -0400
+++ linux-2.6-lttng/mm/filemap.c	2008-07-04 18:26:37.000000000 -0400
@@ -33,6 +33,7 @@
 #include <linux/cpuset.h>
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
 #include <linux/memcontrol.h>
+#include "mm-trace.h"
 #include "internal.h"
 
 /*
@@ -540,9 +541,11 @@ void wait_on_page_bit(struct page *page,
 {
 	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
 
+	trace_mm_filemap_wait_start(page, bit_nr);
 	if (test_bit(bit_nr, &page->flags))
 		__wait_on_bit(page_waitqueue(page), &wait, sync_page,
 							TASK_UNINTERRUPTIBLE);
+	trace_mm_filemap_wait_end(page, bit_nr);
 }
 EXPORT_SYMBOL(wait_on_page_bit);
 
Index: linux-2.6-lttng/mm/memory.c
===================================================================
--- linux-2.6-lttng.orig/mm/memory.c	2008-07-04 18:26:02.000000000 -0400
+++ linux-2.6-lttng/mm/memory.c	2008-07-04 18:26:37.000000000 -0400
@@ -51,6 +51,7 @@
 #include <linux/init.h>
 #include <linux/writeback.h>
 #include <linux/memcontrol.h>
+#include "mm-trace.h"
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -2201,6 +2202,7 @@ static int do_swap_page(struct mm_struct
 		/* Had to read the page from swap area: Major fault */
 		ret = VM_FAULT_MAJOR;
 		count_vm_event(PGMAJFAULT);
+		trace_mm_swap_in(page, entry);
 	}
 
 	if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
@@ -2650,30 +2652,44 @@ unlock:
 int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		unsigned long address, int write_access)
 {
+	int res;
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 
+	trace_mm_handle_fault_entry(address, write_access);
+
 	__set_current_state(TASK_RUNNING);
 
 	count_vm_event(PGFAULT);
 
-	if (unlikely(is_vm_hugetlb_page(vma)))
-		return hugetlb_fault(mm, vma, address, write_access);
+	if (unlikely(is_vm_hugetlb_page(vma))) {
+		res = hugetlb_fault(mm, vma, address, write_access);
+		goto end;
+	}
 
 	pgd = pgd_offset(mm, address);
 	pud = pud_alloc(mm, pgd, address);
-	if (!pud)
-		return VM_FAULT_OOM;
+	if (!pud) {
+		res = VM_FAULT_OOM;
+		goto end;
+	}
 	pmd = pmd_alloc(mm, pud, address);
-	if (!pmd)
-		return VM_FAULT_OOM;
+	if (!pmd) {
+		res = VM_FAULT_OOM;
+		goto end;
+	}
 	pte = pte_alloc_map(mm, pmd, address);
-	if (!pte)
-		return VM_FAULT_OOM;
+	if (!pte) {
+		res = VM_FAULT_OOM;
+		goto end;
+	}
 
-	return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+	res = handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+end:
+	trace_mm_handle_fault_exit();
+	return res;
 }
 
 #ifndef __PAGETABLE_PUD_FOLDED
Index: linux-2.6-lttng/mm/page_alloc.c
===================================================================
--- linux-2.6-lttng.orig/mm/page_alloc.c	2008-07-04 18:26:02.000000000 -0400
+++ linux-2.6-lttng/mm/page_alloc.c	2008-07-04 18:26:37.000000000 -0400
@@ -46,6 +46,7 @@
 #include <linux/page-isolation.h>
 #include <linux/memcontrol.h>
 #include <linux/debugobjects.h>
+#include "mm-trace.h"
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -510,6 +511,8 @@ static void __free_pages_ok(struct page 
 	int i;
 	int reserved = 0;
 
+	trace_mm_page_free(page, order);
+
 	for (i = 0 ; i < (1 << order) ; ++i)
 		reserved += free_pages_check(page + i);
 	if (reserved)
@@ -966,6 +969,8 @@ static void free_hot_cold_page(struct pa
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
 
+	trace_mm_page_free(page, 0);
+
 	if (PageAnon(page))
 		page->mapping = NULL;
 	if (free_pages_check(page))
@@ -1630,6 +1635,7 @@ nopage:
 		show_mem();
 	}
 got_pg:
+	trace_mm_page_alloc(page, order);
 	return page;
 }
 
Index: linux-2.6-lttng/mm/page_io.c
===================================================================
--- linux-2.6-lttng.orig/mm/page_io.c	2008-07-04 18:26:02.000000000 -0400
+++ linux-2.6-lttng/mm/page_io.c	2008-07-04 18:26:37.000000000 -0400
@@ -17,6 +17,7 @@
 #include <linux/bio.h>
 #include <linux/swapops.h>
 #include <linux/writeback.h>
+#include "mm-trace.h"
 #include <asm/pgtable.h>
 
 static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
@@ -114,6 +115,7 @@ int swap_writepage(struct page *page, st
 		rw |= (1 << BIO_RW_SYNC);
 	count_vm_event(PSWPOUT);
 	set_page_writeback(page);
+	trace_mm_swap_out(page);
 	unlock_page(page);
 	submit_bio(rw, bio);
 out:
Index: linux-2.6-lttng/mm/hugetlb.c
===================================================================
--- linux-2.6-lttng.orig/mm/hugetlb.c	2008-07-04 18:26:02.000000000 -0400
+++ linux-2.6-lttng/mm/hugetlb.c	2008-07-04 18:26:37.000000000 -0400
@@ -14,6 +14,7 @@
 #include <linux/mempolicy.h>
 #include <linux/cpuset.h>
 #include <linux/mutex.h>
+#include "mm-trace.h"
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -141,6 +142,7 @@ static void free_huge_page(struct page *
 	int nid = page_to_nid(page);
 	struct address_space *mapping;
 
+	trace_mm_huge_page_free(page);
 	mapping = (struct address_space *) page_private(page);
 	set_page_private(page, 0);
 	BUG_ON(page_count(page));
@@ -509,6 +511,7 @@ static struct page *alloc_huge_page(stru
 	if (!IS_ERR(page)) {
 		set_page_refcounted(page);
 		set_page_private(page, (unsigned long) mapping);
+		trace_mm_huge_page_alloc(page);
 	}
 	return page;
 }
Index: linux-2.6-lttng/mm/swapfile.c
===================================================================
--- linux-2.6-lttng.orig/mm/swapfile.c	2008-07-04 18:26:02.000000000 -0400
+++ linux-2.6-lttng/mm/swapfile.c	2008-07-04 18:26:37.000000000 -0400
@@ -32,6 +32,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
+#include "mm-trace.h"
 
 DEFINE_SPINLOCK(swap_lock);
 unsigned int nr_swapfiles;
@@ -1310,6 +1311,7 @@ asmlinkage long sys_swapoff(const char _
 	swap_map = p->swap_map;
 	p->swap_map = NULL;
 	p->flags = 0;
+	trace_mm_swap_file_close(swap_file);
 	spin_unlock(&swap_lock);
 	mutex_unlock(&swapon_mutex);
 	vfree(swap_map);
@@ -1695,6 +1697,7 @@ asmlinkage long sys_swapon(const char __
 	} else {
 		swap_info[prev].next = p - swap_info;
 	}
+	trace_mm_swap_file_open(swap_file, name);
 	spin_unlock(&swap_lock);
 	mutex_unlock(&swapon_mutex);
 	error = 0;
Index: linux-2.6-lttng/mm/mm-trace.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/mm/mm-trace.h	2008-07-04 18:26:37.000000000 -0400
@@ -0,0 +1,46 @@
+#ifndef _MM_TRACE_H
+#define _MM_TRACE_H
+
+#include <linux/swap.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(mm_filemap_wait_start,
+	TPPROTO(struct page *page, int bit_nr),
+	TPARGS(page, bit_nr));
+DEFINE_TRACE(mm_filemap_wait_end,
+	TPPROTO(struct page *page, int bit_nr),
+	TPARGS(page, bit_nr));
+DEFINE_TRACE(mm_swap_in,
+	TPPROTO(struct page *page, swp_entry_t entry),
+	TPARGS(page, entry));
+DEFINE_TRACE(mm_handle_fault_entry,
+	TPPROTO(unsigned long address, int write_access),
+	TPARGS(address, write_access));
+DEFINE_TRACE(mm_handle_fault_exit,
+	TPPROTO(void),
+	TPARGS());
+DEFINE_TRACE(mm_page_free,
+	TPPROTO(struct page *page, unsigned int order),
+	TPARGS(page, order));
+/*
+ * mm_page_alloc : page can be NULL.
+ */
+DEFINE_TRACE(mm_page_alloc,
+	TPPROTO(struct page *page, unsigned int order),
+	TPARGS(page, order));
+DEFINE_TRACE(mm_swap_out,
+	TPPROTO(struct page *page),
+	TPARGS(page));
+DEFINE_TRACE(mm_huge_page_free,
+	TPPROTO(struct page *page),
+	TPARGS(page));
+DEFINE_TRACE(mm_huge_page_alloc,
+	TPPROTO(struct page *page),
+	TPARGS(page));
+DEFINE_TRACE(mm_swap_file_close,
+	TPPROTO(struct file *file),
+	TPARGS(file));
+DEFINE_TRACE(mm_swap_file_open,
+	TPPROTO(struct file *file, char *filename),
+	TPARGS(file, filename));
+#endif

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC patch 06/12] LTTng instrumentation net
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
                   ` (4 preceding siblings ...)
  2008-07-04 23:52 ` [RFC patch 05/12] LTTng instrumentation mm Mathieu Desnoyers
@ 2008-07-04 23:52 ` Mathieu Desnoyers
  2008-07-04 23:52 ` [RFC patch 07/12] Traceprobes Mathieu Desnoyers
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt
  Cc: Mathieu Desnoyers, netdev, Hideo AOKI, Takashi Nishiie, Masami Hiramatsu

[-- Attachment #1: lttng-instrumentation-net.patch --]
[-- Type: text/plain, Size: 5590 bytes --]

Network core events.

Added tracepoints :

net_del_ifa_ipv4
net_dev_receive
net_dev_xmit
net_insert_ifa_ipv4
net_socket_call
net_socket_create
net_socket_recvmsg
net_socket_sendmsg

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: netdev@vger.kernel.org
CC: 'Peter Zijlstra' <peterz@infradead.org>
CC: "Frank Ch. Eigler" <fche@redhat.com>
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: 'Hideo AOKI' <haoki@redhat.com>
CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
CC: 'Steven Rostedt' <rostedt@goodmis.org>
CC: Masami Hiramatsu <mhiramat@redhat.com>
---
 net/core/dev.c     |    4 ++++
 net/ipv4/devinet.c |    3 +++
 net/net-trace.h    |   40 ++++++++++++++++++++++++++++++++++++++++
 net/socket.c       |    7 +++++++
 4 files changed, 54 insertions(+)

Index: linux-2.6-lttng/net/core/dev.c
===================================================================
--- linux-2.6-lttng.orig/net/core/dev.c	2008-07-04 18:43:16.000000000 -0400
+++ linux-2.6-lttng/net/core/dev.c	2008-07-04 19:21:13.000000000 -0400
@@ -121,6 +121,7 @@
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
 
+#include "../net-trace.h"
 #include "net-sysfs.h"
 
 /*
@@ -1669,6 +1670,8 @@ int dev_queue_xmit(struct sk_buff *skb)
 	}
 
 gso:
+	trace_net_dev_xmit(skb);
+
 	spin_lock_prefetch(&dev->queue_lock);
 
 	/* Disable soft irqs for various locks below. Also
@@ -2069,6 +2072,7 @@ int netif_receive_skb(struct sk_buff *sk
 
 	__get_cpu_var(netdev_rx_stat).total++;
 
+	trace_net_dev_receive(skb);
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
 	skb->mac_len = skb->network_header - skb->mac_header;
Index: linux-2.6-lttng/net/ipv4/devinet.c
===================================================================
--- linux-2.6-lttng.orig/net/ipv4/devinet.c	2008-07-04 18:43:16.000000000 -0400
+++ linux-2.6-lttng/net/ipv4/devinet.c	2008-07-04 19:21:13.000000000 -0400
@@ -63,6 +63,7 @@
 #include <net/ip_fib.h>
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
+#include "../net-trace.h"
 
 static struct ipv4_devconf ipv4_devconf = {
 	.data = {
@@ -257,6 +258,7 @@ static void __inet_del_ifa(struct in_dev
 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 
 		while ((ifa = *ifap1) != NULL) {
+			trace_net_del_ifa_ipv4(ifa);
 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 			    ifa1->ifa_scope <= ifa->ifa_scope)
 				last_prim = ifa;
@@ -363,6 +365,7 @@ static int __inet_insert_ifa(struct in_i
 			}
 			ifa->ifa_flags |= IFA_F_SECONDARY;
 		}
+		trace_net_insert_ifa_ipv4(ifa);
 	}
 
 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
Index: linux-2.6-lttng/net/socket.c
===================================================================
--- linux-2.6-lttng.orig/net/socket.c	2008-07-04 18:43:16.000000000 -0400
+++ linux-2.6-lttng/net/socket.c	2008-07-04 19:21:13.000000000 -0400
@@ -93,6 +93,7 @@
 
 #include <net/sock.h>
 #include <linux/netfilter.h>
+#include "net-trace.h"
 
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
@@ -572,6 +573,7 @@ int sock_sendmsg(struct socket *sock, st
 	ret = __sock_sendmsg(&iocb, sock, msg, size);
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&iocb);
+	trace_net_socket_sendmsg(sock, msg, size, ret);
 	return ret;
 }
 
@@ -651,10 +653,12 @@ int sock_recvmsg(struct socket *sock, st
 	int ret;
 
 	init_sync_kiocb(&iocb, NULL);
+
 	iocb.private = &siocb;
 	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&iocb);
+	trace_net_socket_recvmsg(sock, msg, size, flags, ret);
 	return ret;
 }
 
@@ -1226,6 +1230,7 @@ asmlinkage long sys_socket(int family, i
 	if (retval < 0)
 		goto out_release;
 
+	trace_net_socket_create(sock, retval);
 out:
 	/* It may be already another descriptor 8) Not kernel problem. */
 	return retval;
@@ -2024,6 +2029,8 @@ asmlinkage long sys_socketcall(int call,
 	a0 = a[0];
 	a1 = a[1];
 
+	trace_net_socket_call(call, a0);
+
 	switch (call) {
 	case SYS_SOCKET:
 		err = sys_socket(a0, a1, a[2]);
Index: linux-2.6-lttng/net/net-trace.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/net/net-trace.h	2008-07-04 19:21:13.000000000 -0400
@@ -0,0 +1,40 @@
+#ifndef _NET_TRACE_H
+#define _NET_TRACE_H
+
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <net/sock.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(net_dev_xmit,
+	TPPROTO(struct sk_buff *skb),
+	TPARGS(skb));
+DEFINE_TRACE(net_dev_receive,
+	TPPROTO(struct sk_buff *skb),
+	TPARGS(skb));
+DEFINE_TRACE(net_del_ifa_ipv4,
+	TPPROTO(struct in_ifaddr *ifa),
+	TPARGS(ifa));
+DEFINE_TRACE(net_insert_ifa_ipv4,
+	TPPROTO(struct in_ifaddr *ifa),
+	TPARGS(ifa));
+DEFINE_TRACE(net_socket_sendmsg,
+	TPPROTO(struct socket *sock, struct msghdr *msg, size_t size, int ret),
+	TPARGS(sock, msg, size, ret));
+DEFINE_TRACE(net_socket_recvmsg,
+	TPPROTO(struct socket *sock, struct msghdr *msg, size_t size, int flags,
+		int ret),
+	TPARGS(sock, msg, size, flags, ret));
+DEFINE_TRACE(net_socket_create,
+	TPPROTO(struct socket *sock, int fd),
+	TPARGS(sock, fd));
+/*
+ * net_socket_call
+ *
+ * TODO : This tracepoint should be expanded to cover each element of the
+ * switch in sys_socketcall().
+ */
+DEFINE_TRACE(net_socket_call,
+	TPPROTO(int call, unsigned long a0),
+	TPARGS(call, a0));
+#endif

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC patch 07/12] Traceprobes
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
                   ` (5 preceding siblings ...)
  2008-07-04 23:52 ` [RFC patch 06/12] LTTng instrumentation net Mathieu Desnoyers
@ 2008-07-04 23:52 ` Mathieu Desnoyers
  2008-07-07 16:28   ` Masami Hiramatsu
  2008-07-04 23:52 ` [RFC patch 08/12] LTTng instrumentation FS tracepoint probes Mathieu Desnoyers
                   ` (5 subsequent siblings)
  12 siblings, 1 reply; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt
  Cc: Mathieu Desnoyers, Alexander Viro, Hideo AOKI, Takashi Nishiie,
	Masami Hiramatsu

[-- Attachment #1: traceprobes.patch --]
[-- Type: text/plain, Size: 1390 bytes --]

Menu option to activate tracing probes.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
CC: 'Peter Zijlstra' <peterz@infradead.org>
CC: "Frank Ch. Eigler" <fche@redhat.com>
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: 'Hideo AOKI' <haoki@redhat.com>
CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
CC: 'Steven Rostedt' <rostedt@goodmis.org>
CC: Masami Hiramatsu <mhiramat@redhat.com>
---
 init/Kconfig |    9 +++++++++
 1 file changed, 9 insertions(+)

Index: linux-2.6-lttng/init/Kconfig
===================================================================
--- linux-2.6-lttng.orig/init/Kconfig	2008-07-04 09:32:52.000000000 -0400
+++ linux-2.6-lttng/init/Kconfig	2008-07-04 09:33:05.000000000 -0400
@@ -795,6 +795,15 @@ config MARKERS
 	  Place an empty function call at each marker site. Can be
 	  dynamically changed for a probe function.
 
+config TRACEPROBES
+	tristate "Compile generic tracing probes"
+	depends on MARKERS
+	default y
+	help
+	  Compile generic tracing probes, which connect to the tracepoints when
+	  loaded and format the information collected by the tracepoints with
+	  the Markers.
+
 source "arch/Kconfig"
 
 config PROC_PAGE_MONITOR

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC patch 08/12] LTTng instrumentation FS tracepoint probes
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
                   ` (6 preceding siblings ...)
  2008-07-04 23:52 ` [RFC patch 07/12] Traceprobes Mathieu Desnoyers
@ 2008-07-04 23:52 ` Mathieu Desnoyers
  2008-07-04 23:52 ` [RFC patch 09/12] LTTng instrumentation ipc " Mathieu Desnoyers
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt
  Cc: Mathieu Desnoyers, Alexander Viro, Hideo AOKI, Takashi Nishiie,
	Masami Hiramatsu

[-- Attachment #1: lttng-instrumentation-fs-tracepoints-probes.patch --]
[-- Type: text/plain, Size: 6136 bytes --]

Create a module which declares FS tracepoint probes, using markers.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
CC: 'Peter Zijlstra' <peterz@infradead.org>
CC: "Frank Ch. Eigler" <fche@redhat.com>
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: 'Hideo AOKI' <haoki@redhat.com>
CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
CC: 'Steven Rostedt' <rostedt@goodmis.org>
CC: Masami Hiramatsu <mhiramat@redhat.com>
---
 fs/Makefile   |    1 
 fs/fs-trace.c |  167 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 168 insertions(+)

Index: linux-2.6-lttng/fs/fs-trace.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/fs/fs-trace.c	2008-07-04 19:30:42.000000000 -0400
@@ -0,0 +1,167 @@
+/*
+ * fs/fs-trace.c
+ *
+ * FS tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/buffer_head.h>
+#include "fs-trace.h"
+
+static void probe_fs_buffer_wait_start(struct buffer_head *bh)
+{
+	trace_mark(fs_buffer_wait_start, "bh %p", bh);
+}
+
+static void probe_fs_buffer_wait_end(struct buffer_head *bh)
+{
+	trace_mark(fs_buffer_wait_end, "bh %p", bh);
+}
+
+static void probe_fs_exec(char *filename)
+{
+	trace_mark(fs_exec, "filename %s", filename);
+}
+
+static void probe_fs_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+	trace_mark(fs_ioctl, "fd %u cmd %u arg %lu", fd, cmd, arg);
+}
+
+static void probe_fs_open(int fd, char *filename)
+{
+	trace_mark(fs_open, "fd %d filename %s", fd, filename);
+}
+
+static void probe_fs_close(unsigned int fd)
+{
+	trace_mark(fs_close, "fd %u", fd);
+}
+
+static void probe_fs_lseek(unsigned int fd, long offset, unsigned int origin)
+{
+	trace_mark(fs_lseek, "fd %u offset %ld origin %u", fd, offset, origin);
+}
+
+static void probe_fs_llseek(unsigned int fd, loff_t offset, unsigned int origin)
+{
+	trace_mark(fs_llseek, "fd %u offset %lld origin %u", fd,
+			(long long)offset, origin);
+}
+
+static void probe_fs_read(unsigned int fd, char __user *buf, size_t count,
+		ssize_t ret)
+{
+	trace_mark(fs_read, "fd %u count %zu", fd, count);
+}
+
+static void probe_fs_write(unsigned int fd, const char __user *buf,
+		size_t count, ssize_t ret)
+{
+	trace_mark(fs_write, "fd %u count %zu", fd, count);
+}
+
+static void probe_fs_pread64(unsigned int fd, char __user *buf, size_t count,
+		loff_t pos, ssize_t ret)
+{
+	trace_mark(fs_pread64, "fd %u count %zu pos %llu",
+			fd, count, (unsigned long long)pos);
+}
+
+static void probe_fs_pwrite64(unsigned int fd, const char __user *buf,
+		size_t count, loff_t pos, ssize_t ret)
+{
+	trace_mark(fs_pwrite64, "fd %u count %zu pos %llu",
+			fd, count, (unsigned long long)pos);
+}
+
+static void probe_fs_readv(unsigned long fd, const struct iovec __user *vec,
+		unsigned long vlen, ssize_t ret)
+{
+	trace_mark(fs_readv, "fd %lu vlen %lu", fd, vlen);
+}
+
+static void probe_fs_writev(unsigned long fd, const struct iovec __user *vec,
+		unsigned long vlen, ssize_t ret)
+{
+	trace_mark(fs_writev, "fd %lu vlen %lu", fd, vlen);
+}
+
+static void probe_fs_select(int fd, s64 timeout)
+{
+	trace_mark(fs_select, "fd %d timeout #8d%lld", fd, (long long)timeout);
+}
+
+static void probe_fs_poll(int fd)
+{
+	trace_mark(fs_pollfd, "fd %d", fd);
+}
+
+
+int __init fs_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_fs_buffer_wait_start(probe_fs_buffer_wait_start);
+	WARN_ON(ret);
+	ret = register_trace_fs_buffer_wait_end(probe_fs_buffer_wait_end);
+	WARN_ON(ret);
+	ret = register_trace_fs_exec(probe_fs_exec);
+	WARN_ON(ret);
+	ret = register_trace_fs_ioctl(probe_fs_ioctl);
+	WARN_ON(ret);
+	ret = register_trace_fs_open(probe_fs_open);
+	WARN_ON(ret);
+	ret = register_trace_fs_close(probe_fs_close);
+	WARN_ON(ret);
+	ret = register_trace_fs_lseek(probe_fs_lseek);
+	WARN_ON(ret);
+	ret = register_trace_fs_llseek(probe_fs_llseek);
+	WARN_ON(ret);
+	ret = register_trace_fs_read(probe_fs_read);
+	WARN_ON(ret);
+	ret = register_trace_fs_write(probe_fs_write);
+	WARN_ON(ret);
+	ret = register_trace_fs_pread64(probe_fs_pread64);
+	WARN_ON(ret);
+	ret = register_trace_fs_pwrite64(probe_fs_pwrite64);
+	WARN_ON(ret);
+	ret = register_trace_fs_readv(probe_fs_readv);
+	WARN_ON(ret);
+	ret = register_trace_fs_writev(probe_fs_writev);
+	WARN_ON(ret);
+	ret = register_trace_fs_select(probe_fs_select);
+	WARN_ON(ret);
+	ret = register_trace_fs_poll(probe_fs_poll);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(fs_trace_init);
+
+void __exit fs_trace_exit(void)
+{
+	unregister_trace_fs_poll(probe_fs_poll);
+	unregister_trace_fs_select(probe_fs_select);
+	unregister_trace_fs_writev(probe_fs_writev);
+	unregister_trace_fs_readv(probe_fs_readv);
+	unregister_trace_fs_pwrite64(probe_fs_pwrite64);
+	unregister_trace_fs_pread64(probe_fs_pread64);
+	unregister_trace_fs_write(probe_fs_write);
+	unregister_trace_fs_read(probe_fs_read);
+	unregister_trace_fs_llseek(probe_fs_llseek);
+	unregister_trace_fs_lseek(probe_fs_lseek);
+	unregister_trace_fs_close(probe_fs_close);
+	unregister_trace_fs_open(probe_fs_open);
+	unregister_trace_fs_ioctl(probe_fs_ioctl);
+	unregister_trace_fs_exec(probe_fs_exec);
+	unregister_trace_fs_buffer_wait_end(probe_fs_buffer_wait_end);
+	unregister_trace_fs_buffer_wait_start(probe_fs_buffer_wait_start);
+}
+
+module_exit(fs_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("FS Tracepoint Probes");
Index: linux-2.6-lttng/fs/Makefile
===================================================================
--- linux-2.6-lttng.orig/fs/Makefile	2008-07-04 19:21:32.000000000 -0400
+++ linux-2.6-lttng/fs/Makefile	2008-07-04 19:26:08.000000000 -0400
@@ -63,6 +63,7 @@ obj-$(CONFIG_CONFIGFS_FS)	+= configfs/
 obj-y				+= devpts/
 
 obj-$(CONFIG_PROFILING)		+= dcookies.o
+obj-$(CONFIG_TRACEPROBES)	+= fs-trace.o
 obj-$(CONFIG_DLM)		+= dlm/
  
 # Do not add any filesystems before this line

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC patch 09/12] LTTng instrumentation ipc tracepoint probes
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
                   ` (7 preceding siblings ...)
  2008-07-04 23:52 ` [RFC patch 08/12] LTTng instrumentation FS tracepoint probes Mathieu Desnoyers
@ 2008-07-04 23:52 ` Mathieu Desnoyers
  2008-07-04 23:52 ` [RFC patch 10/12] LTTng instrumentation kernel " Mathieu Desnoyers
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt
  Cc: Mathieu Desnoyers, Alexander Viro, Hideo AOKI, Takashi Nishiie,
	Masami Hiramatsu

[-- Attachment #1: lttng-instrumentation-ipc-tracepoints-probes.patch --]
[-- Type: text/plain, Size: 2694 bytes --]

Create a module which declares ipc tracepoint probes, using markers.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
CC: 'Peter Zijlstra' <peterz@infradead.org>
CC: "Frank Ch. Eigler" <fche@redhat.com>
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: 'Hideo AOKI' <haoki@redhat.com>
CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
CC: 'Steven Rostedt' <rostedt@goodmis.org>
CC: Masami Hiramatsu <mhiramat@redhat.com>
---
 ipc/Makefile    |    2 +-
 ipc/ipc-trace.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)

Index: linux-2.6-lttng/ipc/Makefile
===================================================================
--- linux-2.6-lttng.orig/ipc/Makefile	2008-07-04 10:04:27.000000000 -0400
+++ linux-2.6-lttng/ipc/Makefile	2008-07-04 10:04:32.000000000 -0400
@@ -8,4 +8,4 @@ obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysc
 obj_mq-$(CONFIG_COMPAT) += compat_mq.o
 obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
 obj-$(CONFIG_IPC_NS) += namespace.o
-
+obj-$(CONFIG_TRACEPROBES) += ipc-trace.o
Index: linux-2.6-lttng/ipc/ipc-trace.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/ipc/ipc-trace.c	2008-07-04 10:15:10.000000000 -0400
@@ -0,0 +1,52 @@
+/*
+ * ipc/ipc-trace.c
+ *
+ * IPC tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include "ipc-trace.h"
+
+static void probe_ipc_msg_create(long id, int flags)
+{
+	trace_mark(ipc_msg_create, "id %ld flags %d", id, flags);
+}
+
+static void probe_ipc_sem_create(long id, int flags)
+{
+	trace_mark(ipc_sem_create, "id %ld flags %d", id, flags);
+}
+
+static void probe_ipc_shm_create(long id, int flags)
+{
+	trace_mark(ipc_shm_create, "id %ld flags %d", id, flags);
+}
+
+int __init ipc_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_ipc_msg_create(probe_ipc_msg_create);
+	WARN_ON(ret);
+	ret = register_trace_ipc_sem_create(probe_ipc_sem_create);
+	WARN_ON(ret);
+	ret = register_trace_ipc_shm_create(probe_ipc_shm_create);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(ipc_trace_init);
+
+void __exit ipc_trace_exit(void)
+{
+	unregister_trace_ipc_shm_create(probe_ipc_shm_create);
+	unregister_trace_ipc_sem_create(probe_ipc_sem_create);
+	unregister_trace_ipc_msg_create(probe_ipc_msg_create);
+}
+
+module_exit(ipc_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("IPC Tracepoint Probes");

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC patch 10/12] LTTng instrumentation kernel tracepoint probes
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
                   ` (8 preceding siblings ...)
  2008-07-04 23:52 ` [RFC patch 09/12] LTTng instrumentation ipc " Mathieu Desnoyers
@ 2008-07-04 23:52 ` Mathieu Desnoyers
  2008-07-04 23:52 ` [RFC patch 11/12] LTTng instrumentation mm " Mathieu Desnoyers
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt
  Cc: Mathieu Desnoyers, Alexander Viro, Hideo AOKI, Takashi Nishiie,
	Masami Hiramatsu

[-- Attachment #1: lttng-instrumentation-kernel-tracepoints-probes.patch --]
[-- Type: text/plain, Size: 12474 bytes --]

Create a module which declares kernel tracepoint probes, using markers.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
CC: 'Peter Zijlstra' <peterz@infradead.org>
CC: "Frank Ch. Eigler" <fche@redhat.com>
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: 'Hideo AOKI' <haoki@redhat.com>
CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
CC: 'Steven Rostedt' <rostedt@goodmis.org>
CC: Masami Hiramatsu <mhiramat@redhat.com>
---
 kernel/Makefile       |    1 
 kernel/kernel-trace.c |  345 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 346 insertions(+)

Index: linux-2.6-lttng/kernel/Makefile
===================================================================
--- linux-2.6-lttng.orig/kernel/Makefile	2008-07-04 16:09:41.000000000 -0400
+++ linux-2.6-lttng/kernel/Makefile	2008-07-04 16:10:10.000000000 -0400
@@ -69,6 +69,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayac
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_MARKERS) += marker.o
 obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
+obj-$(CONFIG_TRACEPROBES) += kernel-trace.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
Index: linux-2.6-lttng/kernel/kernel-trace.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/kernel-trace.c	2008-07-04 16:58:32.000000000 -0400
@@ -0,0 +1,345 @@
+/*
+ * kernel/kernel-trace.c
+ *
+ * kernel tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include "kernel-trace.h"
+
+static void probe_kernel_irq_entry(unsigned int id, struct pt_regs *regs)
+{
+	trace_mark(kernel_irq_entry, "irq_id %u kernel_mode %u", id,
+		(regs)?(!user_mode(regs)):(1));
+}
+
+static void probe_kernel_irq_exit(void)
+{
+	trace_mark(kernel_irq_exit, MARK_NOARGS);
+}
+
+static void probe_kernel_timer_itimer_expired(struct signal_struct *sig)
+{
+	trace_mark(kernel_timer_itimer_expired, "pid %d",
+		pid_nr(sig->leader_pid));
+}
+
+static void probe_kernel_timer_itimer_set(int which, struct itimerval *value)
+{
+	trace_mark(kernel_timer_itimer_set,
+		"which %d interval_sec %ld interval_usec %ld "
+		"value_sec %ld value_usec %ld",
+		which,
+		value->it_interval.tv_sec,
+		value->it_interval.tv_usec,
+		value->it_value.tv_sec,
+		value->it_value.tv_usec);
+}
+
+static void probe_kernel_kthread_stop(struct task_struct *t)
+{
+	trace_mark(kernel_kthread_stop, "pid %d", t->pid);
+}
+
+static void probe_kernel_kthread_stop_ret(int ret)
+{
+	trace_mark(kernel_kthread_stop_ret, "ret %d", ret);
+}
+
+static void probe_kernel_printk(void *retaddr)
+{
+	trace_mark(kernel_printk, "ip %p", retaddr);
+}
+
+static void probe_kernel_vprintk(void *retaddr, char *buf, int len)
+{
+	if (len > 0) {
+		unsigned int loglevel;
+		int mark_len;
+		char *mark_buf;
+		char saved_char;
+
+		if (buf[0] == '<' && buf[1] >= '0' &&
+		   buf[1] <= '7' && buf[2] == '>') {
+			loglevel = buf[1] - '0';
+			mark_buf = &buf[3];
+			mark_len = len - 3;
+		} else {
+			loglevel = default_message_loglevel;
+			mark_buf = buf;
+			mark_len = len;
+		}
+		if (mark_buf[mark_len - 1] == '\n')
+			mark_len--;
+		saved_char = mark_buf[mark_len];
+		mark_buf[mark_len] = '\0';
+		trace_mark(kernel_vprintk, "loglevel %c string %s ip %p",
+			loglevel, mark_buf, retaddr);
+		mark_buf[mark_len] = saved_char;
+	}
+}
+
+static void probe_kernel_sched_wait_task(struct task_struct *p)
+{
+	trace_mark(kernel_sched_wait_task, "pid %d state %ld",
+		p->pid, p->state);
+}
+
+static void probe_kernel_sched_try_wakeup(struct task_struct *p)
+{
+	trace_mark(kernel_sched_try_wakeup, "pid %d state %ld",
+		p->pid, p->state);
+}
+
+static void probe_kernel_sched_wakeup_new_task(struct task_struct *p)
+{
+	trace_mark(kernel_sched_wakeup_new_task, "pid %d state %ld",
+		p->pid, p->state);
+}
+
+static void probe_kernel_sched_schedule(struct task_struct *prev,
+		struct task_struct *next)
+{
+	trace_mark(kernel_sched_schedule,
+		"prev_pid %d next_pid %d prev_state %ld",
+		prev->pid, next->pid, prev->state);
+}
+
+static void probe_kernel_sched_migrate_task(struct task_struct *p, int dest_cpu)
+{
+	trace_mark(kernel_sched_migrate_task, "pid %d state %ld dest_cpu %d",
+		p->pid, p->state, dest_cpu);
+}
+
+static void probe_kernel_signal_send(int sig, struct task_struct *p)
+{
+	trace_mark(kernel_send_signal, "pid %d signal %d", p->pid, sig);
+}
+
+static void probe_kernel_softirq_entry(struct softirq_action *h,
+	struct softirq_action *softirq_vec)
+{
+	trace_mark(kernel_softirq_entry, "softirq_id %lu",
+		((unsigned long)h - (unsigned long)softirq_vec) / sizeof(*h));
+}
+
+static void probe_kernel_softirq_exit(struct softirq_action *h,
+	struct softirq_action *softirq_vec)
+{
+	trace_mark(kernel_softirq_exit, "softirq_id %lu",
+		((unsigned long)h - (unsigned long)softirq_vec) / sizeof(*h));
+}
+
+static void probe_kernel_softirq_raise(unsigned int nr)
+{
+	trace_mark(kernel_softirq_raise, "softirq_id %u", nr);
+}
+
+static void probe_kernel_tasklet_low_entry(struct tasklet_struct *t)
+{
+	trace_mark(kernel_tasklet_low_entry, "func %p data %lu",
+		t->func, t->data);
+}
+
+static void probe_kernel_tasklet_low_exit(struct tasklet_struct *t)
+{
+	trace_mark(kernel_tasklet_low_exit, "func %p data %lu",
+		t->func, t->data);
+}
+
+static void probe_kernel_tasklet_high_entry(struct tasklet_struct *t)
+{
+	trace_mark(kernel_tasklet_high_entry, "func %p data %lu",
+		t->func, t->data);
+}
+
+static void probe_kernel_tasklet_high_exit(struct tasklet_struct *t)
+{
+	trace_mark(kernel_tasklet_high_exit, "func %p data %lu",
+		t->func, t->data);
+}
+
+static void probe_kernel_timer_set(struct timer_list *timer)
+{
+	trace_mark(kernel_timer_set, "expires %lu function %p data %lu",
+		timer->expires, timer->function, timer->data);
+}
+
+static void probe_kernel_timer_update_time(void)
+{
+	trace_mark(kernel_timer_update_time,
+		"jiffies #8u%llu xtime_sec %ld xtime_nsec %ld "
+		"walltomonotonic_sec %ld walltomonotonic_nsec %ld",
+		(unsigned long long)jiffies_64, xtime.tv_sec, xtime.tv_nsec,
+		wall_to_monotonic.tv_sec, wall_to_monotonic.tv_nsec);
+}
+
+static void probe_kernel_timer_timeout(struct task_struct *p)
+{
+	trace_mark(kernel_timer_timeout, "pid %d", p->pid);
+}
+
+static void probe_kernel_process_free(struct task_struct *p)
+{
+	trace_mark(kernel_process_free, "pid %d", p->pid);
+}
+
+static void probe_kernel_process_exit(struct task_struct *p)
+{
+	trace_mark(kernel_process_exit, "pid %d", p->pid);
+}
+
+static void probe_kernel_process_wait(struct pid *pid)
+{
+	trace_mark(kernel_process_wait, "pid %d", pid_nr(pid));
+}
+
+static void probe_kernel_process_fork(struct task_struct *parent,
+		struct task_struct *child)
+{
+	trace_mark(kernel_process_fork,
+		"parent_pid %d child_pid %d child_tgid %d",
+		parent->pid, child->pid, child->tgid);
+}
+
+static void probe_kernel_module_free(struct module *mod)
+{
+	trace_mark(kernel_module_free, "name %s", mod->name);
+}
+
+static void probe_kernel_module_load(struct module *mod)
+{
+	trace_mark(kernel_module_load, "name %s", mod->name);
+}
+
+int __init kernel_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_kernel_irq_entry(probe_kernel_irq_entry);
+	WARN_ON(ret);
+	ret = register_trace_kernel_irq_exit(probe_kernel_irq_exit);
+	WARN_ON(ret);
+	ret = register_trace_kernel_timer_itimer_expired(
+		probe_kernel_timer_itimer_expired);
+	WARN_ON(ret);
+	ret = register_trace_kernel_timer_itimer_set(
+		probe_kernel_timer_itimer_set);
+	WARN_ON(ret);
+	ret = register_trace_kernel_kthread_stop(probe_kernel_kthread_stop);
+	WARN_ON(ret);
+	ret = register_trace_kernel_kthread_stop_ret(
+		probe_kernel_kthread_stop_ret);
+	WARN_ON(ret);
+	ret = register_trace_kernel_printk(probe_kernel_printk);
+	WARN_ON(ret);
+	ret = register_trace_kernel_vprintk(probe_kernel_vprintk);
+	WARN_ON(ret);
+	ret = register_trace_kernel_sched_wait_task(
+		probe_kernel_sched_wait_task);
+	WARN_ON(ret);
+	ret = register_trace_kernel_sched_try_wakeup(
+		probe_kernel_sched_try_wakeup);
+	WARN_ON(ret);
+	ret = register_trace_kernel_sched_wakeup_new_task(
+		probe_kernel_sched_wakeup_new_task);
+	WARN_ON(ret);
+	ret = register_trace_kernel_sched_schedule(
+		probe_kernel_sched_schedule);
+	WARN_ON(ret);
+	ret = register_trace_kernel_sched_migrate_task(
+		probe_kernel_sched_migrate_task);
+	WARN_ON(ret);
+	ret = register_trace_kernel_signal_send(probe_kernel_signal_send);
+	WARN_ON(ret);
+	ret = register_trace_kernel_softirq_entry(probe_kernel_softirq_entry);
+	WARN_ON(ret);
+	ret = register_trace_kernel_softirq_exit(probe_kernel_softirq_exit);
+	WARN_ON(ret);
+	ret = register_trace_kernel_softirq_raise(probe_kernel_softirq_raise);
+	WARN_ON(ret);
+	ret = register_trace_kernel_tasklet_low_entry(
+		probe_kernel_tasklet_low_entry);
+	WARN_ON(ret);
+	ret = register_trace_kernel_tasklet_low_exit(
+		probe_kernel_tasklet_low_exit);
+	WARN_ON(ret);
+	ret = register_trace_kernel_tasklet_high_entry(
+		probe_kernel_tasklet_high_entry);
+	WARN_ON(ret);
+	ret = register_trace_kernel_tasklet_high_exit(
+		probe_kernel_tasklet_high_exit);
+	WARN_ON(ret);
+	ret = register_trace_kernel_timer_set(probe_kernel_timer_set);
+	WARN_ON(ret);
+	ret = register_trace_kernel_timer_update_time(
+		probe_kernel_timer_update_time);
+	WARN_ON(ret);
+	ret = register_trace_kernel_timer_timeout(probe_kernel_timer_timeout);
+	WARN_ON(ret);
+	ret = register_trace_kernel_process_free(probe_kernel_process_free);
+	WARN_ON(ret);
+	ret = register_trace_kernel_process_exit(probe_kernel_process_exit);
+	WARN_ON(ret);
+	ret = register_trace_kernel_process_wait(probe_kernel_process_wait);
+	WARN_ON(ret);
+	ret = register_trace_kernel_process_fork(probe_kernel_process_fork);
+	WARN_ON(ret);
+	ret = register_trace_kernel_module_free(probe_kernel_module_free);
+	WARN_ON(ret);
+	ret = register_trace_kernel_module_load(probe_kernel_module_load);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(kernel_trace_init);
+
+void __exit kernel_trace_exit(void)
+{
+	unregister_trace_kernel_module_load(probe_kernel_module_load);
+	unregister_trace_kernel_module_free(probe_kernel_module_free);
+	unregister_trace_kernel_process_fork(probe_kernel_process_fork);
+	unregister_trace_kernel_process_wait(probe_kernel_process_wait);
+	unregister_trace_kernel_process_exit(probe_kernel_process_exit);
+	unregister_trace_kernel_process_free(probe_kernel_process_free);
+	unregister_trace_kernel_timer_timeout(probe_kernel_timer_timeout);
+	unregister_trace_kernel_timer_update_time(
+		probe_kernel_timer_update_time);
+	unregister_trace_kernel_timer_set(probe_kernel_timer_set);
+	unregister_trace_kernel_tasklet_high_exit(
+		probe_kernel_tasklet_high_exit);
+	unregister_trace_kernel_tasklet_high_entry(
+		probe_kernel_tasklet_high_entry);
+	unregister_trace_kernel_tasklet_low_exit(
+		probe_kernel_tasklet_low_exit);
+	unregister_trace_kernel_tasklet_low_entry(
+		probe_kernel_tasklet_low_entry);
+	unregister_trace_kernel_softirq_raise(probe_kernel_softirq_raise);
+	unregister_trace_kernel_softirq_exit(probe_kernel_softirq_exit);
+	unregister_trace_kernel_softirq_entry(probe_kernel_softirq_entry);
+	unregister_trace_kernel_signal_send(probe_kernel_signal_send);
+	unregister_trace_kernel_sched_migrate_task(
+		probe_kernel_sched_migrate_task);
+	unregister_trace_kernel_sched_schedule(probe_kernel_sched_schedule);
+	unregister_trace_kernel_sched_wakeup_new_task(
+		probe_kernel_sched_wakeup_new_task);
+	unregister_trace_kernel_sched_try_wakeup(
+		probe_kernel_sched_try_wakeup);
+	unregister_trace_kernel_sched_wait_task(probe_kernel_sched_wait_task);
+	unregister_trace_kernel_vprintk(probe_kernel_vprintk);
+	unregister_trace_kernel_printk(probe_kernel_printk);
+	unregister_trace_kernel_kthread_stop_ret(probe_kernel_kthread_stop_ret);
+	unregister_trace_kernel_kthread_stop(probe_kernel_kthread_stop);
+	unregister_trace_kernel_timer_itimer_set(probe_kernel_timer_itimer_set);
+	unregister_trace_kernel_timer_itimer_expired(
+		probe_kernel_timer_itimer_expired);
+	unregister_trace_kernel_irq_exit(probe_kernel_irq_exit);
+	unregister_trace_kernel_irq_entry(probe_kernel_irq_entry);
+}
+
+module_exit(kernel_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("kernel Tracepoint Probes");

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC patch 11/12] LTTng instrumentation mm tracepoint probes
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
                   ` (9 preceding siblings ...)
  2008-07-04 23:52 ` [RFC patch 10/12] LTTng instrumentation kernel " Mathieu Desnoyers
@ 2008-07-04 23:52 ` Mathieu Desnoyers
  2008-07-04 23:52 ` [RFC patch 12/12] LTTng instrumentation net " Mathieu Desnoyers
  2008-07-05 23:27 ` [RFC patch 00/12] Tracepoints v2 Eduard - Gabriel Munteanu
  12 siblings, 0 replies; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt
  Cc: Mathieu Desnoyers, Alexander Viro, Hideo AOKI, Takashi Nishiie,
	Masami Hiramatsu

[-- Attachment #1: lttng-instrumentation-mm-tracepoints-probes.patch --]
[-- Type: text/plain, Size: 6510 bytes --]

Create a module which declares mm tracepoint probes, using markers.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
CC: 'Peter Zijlstra' <peterz@infradead.org>
CC: "Frank Ch. Eigler" <fche@redhat.com>
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: 'Hideo AOKI' <haoki@redhat.com>
CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
CC: 'Steven Rostedt' <rostedt@goodmis.org>
CC: Masami Hiramatsu <mhiramat@redhat.com>
---
 include/linux/swapops.h |    8 ++
 mm/Makefile             |    1 
 mm/mm-trace.c           |  150 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 159 insertions(+)

Index: linux-2.6-lttng/mm/Makefile
===================================================================
--- linux-2.6-lttng.orig/mm/Makefile	2008-07-04 18:26:42.000000000 -0400
+++ linux-2.6-lttng/mm/Makefile	2008-07-04 18:27:00.000000000 -0400
@@ -33,4 +33,5 @@ obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_SMP) += allocpercpu.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o
+obj-$(CONFIG_TRACEPROBES) += mm-trace.o
 
Index: linux-2.6-lttng/mm/mm-trace.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/mm/mm-trace.c	2008-07-04 18:27:00.000000000 -0400
@@ -0,0 +1,150 @@
+/*
+ * mm/mm-trace.c
+ *
+ * MM tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <linux/swapops.h>
+#include "mm-trace.h"
+
+static void probe_mm_filemap_wait_start(struct page *page, int bit_nr)
+{
+	trace_mark(mm_filemap_wait_start, "pfn %lu bit_nr %d",
+		page_to_pfn(page), bit_nr);
+}
+
+static void probe_mm_filemap_wait_end(struct page *page, int bit_nr)
+{
+	trace_mark(mm_filemap_wait_end, "pfn %lu bit_nr %d",
+		page_to_pfn(page), bit_nr);
+}
+
+static void probe_mm_huge_page_free(struct page *page)
+{
+	trace_mark(mm_huge_page_free, "pfn %lu", page_to_pfn(page));
+}
+
+static void probe_mm_huge_page_alloc(struct page *page)
+{
+	trace_mark(mm_huge_page_alloc, "pfn %lu", page_to_pfn(page));
+}
+
+#ifdef CONFIG_SWAP
+static void probe_mm_swap_in(struct page *page, swp_entry_t entry)
+{
+	trace_mark(mm_swap_in, "pfn %lu filp %p offset %lu",
+		page_to_pfn(page),
+		get_swap_info_struct(swp_type(entry))->swap_file,
+		swp_offset(entry));
+}
+#endif
+
+static void probe_mm_handle_fault_entry(unsigned long address, int write_access)
+{
+	trace_mark(mm_handle_fault_entry,
+		"address %lu ip #p%ld write_access %d",
+		address, KSTK_EIP(current), write_access);
+}
+
+static void probe_mm_handle_fault_exit(void)
+{
+	trace_mark(mm_handle_fault_exit, MARK_NOARGS);
+}
+
+static void probe_mm_page_free(struct page *page, unsigned int order)
+{
+	trace_mark(mm_page_free, "order %u pfn %lu", order, page_to_pfn(page));
+}
+
+static void probe_mm_page_alloc(struct page *page, unsigned int order)
+{
+	if (page)
+		trace_mark(mm_page_alloc, "order %u pfn %lu", order,
+			page_to_pfn(page));
+}
+
+static void probe_mm_swap_out(struct page *page)
+{
+	trace_mark(mm_swap_out, "pfn %lu filp %p offset %lu",
+		page_to_pfn(page),
+		get_swap_info_struct(swp_type(
+			page_swp_entry(page)))->swap_file,
+		swp_offset(page_swp_entry(page)));
+}
+
+static void probe_mm_swap_file_close(struct file *file)
+{
+	trace_mark(mm_swap_file_close, "filp %p", file);
+}
+
+static void probe_mm_swap_file_open(struct file *file, char *filename)
+{
+	trace_mark(mm_swap_file_open, "filp %p filename %s",
+		file, filename);
+}
+
+int __init mm_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_mm_filemap_wait_start(probe_mm_filemap_wait_start);
+	WARN_ON(ret);
+	ret = register_trace_mm_filemap_wait_end(probe_mm_filemap_wait_end);
+	WARN_ON(ret);
+	ret = register_trace_mm_huge_page_free(probe_mm_huge_page_free);
+	WARN_ON(ret);
+	ret = register_trace_mm_huge_page_alloc(probe_mm_huge_page_alloc);
+	WARN_ON(ret);
+#ifdef CONFIG_SWAP
+	ret = register_trace_mm_swap_in(probe_mm_swap_in);
+	WARN_ON(ret);
+#endif
+	ret = register_trace_mm_handle_fault_entry(probe_mm_handle_fault_entry);
+	WARN_ON(ret);
+	ret = register_trace_mm_handle_fault_exit(probe_mm_handle_fault_exit);
+	WARN_ON(ret);
+	ret = register_trace_mm_page_free(probe_mm_page_free);
+	WARN_ON(ret);
+	ret = register_trace_mm_page_alloc(probe_mm_page_alloc);
+	WARN_ON(ret);
+	ret = register_trace_mm_swap_out(probe_mm_swap_out);
+	WARN_ON(ret);
+	ret = register_trace_mm_swap_file_close(probe_mm_swap_file_close);
+	WARN_ON(ret);
+	ret = register_trace_mm_swap_file_open(probe_mm_swap_file_open);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(mm_trace_init);
+
+void __exit mm_trace_exit(void)
+{
+	unregister_trace_mm_swap_file_open(probe_mm_swap_file_open);
+	unregister_trace_mm_swap_file_close(probe_mm_swap_file_close);
+	unregister_trace_mm_swap_out(probe_mm_swap_out);
+	unregister_trace_mm_page_alloc(probe_mm_page_alloc);
+	unregister_trace_mm_page_free(probe_mm_page_free);
+	unregister_trace_mm_handle_fault_exit(probe_mm_handle_fault_exit);
+	unregister_trace_mm_handle_fault_entry(probe_mm_handle_fault_entry);
+#ifdef CONFIG_SWAP
+	unregister_trace_mm_swap_in(probe_mm_swap_in);
+#endif
+	unregister_trace_mm_huge_page_alloc(probe_mm_huge_page_alloc);
+	unregister_trace_mm_huge_page_free(probe_mm_huge_page_free);
+	unregister_trace_mm_filemap_wait_end(probe_mm_filemap_wait_end);
+	unregister_trace_mm_filemap_wait_start(probe_mm_filemap_wait_start);
+}
+
+module_exit(mm_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("MM Tracepoint Probes");
Index: linux-2.6-lttng/include/linux/swapops.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/swapops.h	2008-07-04 18:26:02.000000000 -0400
+++ linux-2.6-lttng/include/linux/swapops.h	2008-07-04 18:27:00.000000000 -0400
@@ -76,6 +76,14 @@ static inline pte_t swp_entry_to_pte(swp
 	return __swp_entry_to_pte(arch_entry);
 }
 
+static inline swp_entry_t page_swp_entry(struct page *page)
+{
+	swp_entry_t entry;
+	VM_BUG_ON(!PageSwapCache(page));
+	entry.val = page_private(page);
+	return entry;
+}
+
 #ifdef CONFIG_MIGRATION
 static inline swp_entry_t make_migration_entry(struct page *page, int write)
 {

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC patch 12/12] LTTng instrumentation net tracepoint probes
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
                   ` (10 preceding siblings ...)
  2008-07-04 23:52 ` [RFC patch 11/12] LTTng instrumentation mm " Mathieu Desnoyers
@ 2008-07-04 23:52 ` Mathieu Desnoyers
  2008-07-05 23:27 ` [RFC patch 00/12] Tracepoints v2 Eduard - Gabriel Munteanu
  12 siblings, 0 replies; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-04 23:52 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt
  Cc: Mathieu Desnoyers, Alexander Viro, Hideo AOKI, Takashi Nishiie,
	Masami Hiramatsu

[-- Attachment #1: lttng-instrumentation-net-tracepoints-probes.patch --]
[-- Type: text/plain, Size: 4628 bytes --]

Create a module which declares net tracepoint probes, using markers.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
CC: 'Peter Zijlstra' <peterz@infradead.org>
CC: "Frank Ch. Eigler" <fche@redhat.com>
CC: 'Ingo Molnar' <mingo@elte.hu>
CC: 'Hideo AOKI' <haoki@redhat.com>
CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
CC: 'Steven Rostedt' <rostedt@goodmis.org>
CC: Masami Hiramatsu <mhiramat@redhat.com>
---
 net/Makefile    |    3 +
 net/net-trace.c |  105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+)

Index: linux-2.6-lttng/net/Makefile
===================================================================
--- linux-2.6-lttng.orig/net/Makefile	2008-07-04 19:16:42.000000000 -0400
+++ linux-2.6-lttng/net/Makefile	2008-07-04 19:16:44.000000000 -0400
@@ -11,6 +11,9 @@ obj-$(CONFIG_NET)		:= socket.o core/
 
 tmp-$(CONFIG_COMPAT) 		:= compat.o
 obj-$(CONFIG_NET)		+= $(tmp-y)
+ifeq ($(CONFIG_NET),y)
+obj-$(CONFIG_TRACEPROBES)	+= net-trace.o
+endif
 
 # LLC has to be linked before the files in net/802/
 obj-$(CONFIG_LLC)		+= llc/
Index: linux-2.6-lttng/net/net-trace.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/net/net-trace.c	2008-07-04 19:19:04.000000000 -0400
@@ -0,0 +1,105 @@
+/*
+ * net/net-trace.c
+ *
+ * Net tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include "net-trace.h"
+
+static void probe_net_dev_xmit(struct sk_buff *skb)
+{
+	trace_mark(net_dev_xmit, "skb %p protocol #2u%hu", skb, skb->protocol);
+}
+
+static void probe_net_dev_receive(struct sk_buff *skb)
+{
+	trace_mark(net_dev_receive, "skb %p protocol #2u%hu",
+		skb, skb->protocol);
+}
+
+static void probe_net_del_ifa_ipv4(struct in_ifaddr *ifa)
+{
+	trace_mark(net_del_ifa_ipv4, "label %s", ifa->ifa_label);
+}
+
+static void probe_net_insert_ifa_ipv4(struct in_ifaddr *ifa)
+{
+	trace_mark(net_insert_ifa_ipv4, "label %s address #4u%lu",
+		ifa->ifa_label, (unsigned long)ifa->ifa_address);
+}
+
+static void probe_net_socket_sendmsg(struct socket *sock, struct msghdr *msg,
+		size_t size, int ret)
+{
+	trace_mark(net_socket_sendmsg,
+		"sock %p family %d type %d protocol %d size %zu",
+		sock, sock->sk->sk_family, sock->sk->sk_type,
+		sock->sk->sk_protocol, size);
+}
+
+static void probe_net_socket_recvmsg(struct socket *sock, struct msghdr *msg,
+		size_t size, int flags, int ret)
+{
+	trace_mark(net_socket_recvmsg,
+		"sock %p family %d type %d protocol %d size %zu",
+		sock, sock->sk->sk_family, sock->sk->sk_type,
+		sock->sk->sk_protocol, size);
+}
+
+static void probe_net_socket_create(struct socket *sock, int fd)
+{
+	trace_mark(net_socket_create,
+		"sock %p family %d type %d protocol %d fd %d",
+		sock, sock->sk->sk_family, sock->sk->sk_type,
+		sock->sk->sk_protocol, fd);
+}
+
+static void probe_net_socket_call(int call, unsigned long a0)
+{
+	trace_mark(net_socket_call, "call %d a0 %lu", call, a0);
+}
+
+int __init net_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_net_dev_xmit(probe_net_dev_xmit);
+	WARN_ON(ret);
+	ret = register_trace_net_dev_receive(probe_net_dev_receive);
+	WARN_ON(ret);
+	ret = register_trace_net_del_ifa_ipv4(probe_net_del_ifa_ipv4);
+	WARN_ON(ret);
+	ret = register_trace_net_insert_ifa_ipv4(probe_net_insert_ifa_ipv4);
+	WARN_ON(ret);
+	ret = register_trace_net_socket_sendmsg(probe_net_socket_sendmsg);
+	WARN_ON(ret);
+	ret = register_trace_net_socket_recvmsg(probe_net_socket_recvmsg);
+	WARN_ON(ret);
+	ret = register_trace_net_socket_create(probe_net_socket_create);
+	WARN_ON(ret);
+	ret = register_trace_net_socket_call(probe_net_socket_call);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(net_trace_init);
+
+void __exit net_trace_exit(void)
+{
+	unregister_trace_net_socket_call(probe_net_socket_call);
+	unregister_trace_net_socket_create(probe_net_socket_create);
+	unregister_trace_net_socket_recvmsg(probe_net_socket_recvmsg);
+	unregister_trace_net_socket_sendmsg(probe_net_socket_sendmsg);
+	unregister_trace_net_insert_ifa_ipv4(probe_net_insert_ifa_ipv4);
+	unregister_trace_net_del_ifa_ipv4(probe_net_del_ifa_ipv4);
+	unregister_trace_net_dev_receive(probe_net_dev_receive);
+	unregister_trace_net_dev_xmit(probe_net_dev_xmit);
+}
+
+module_exit(net_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Net Tracepoint Probes");

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC patch 05/12] LTTng instrumentation mm
  2008-07-04 23:52 ` [RFC patch 05/12] LTTng instrumentation mm Mathieu Desnoyers
@ 2008-07-05  9:42   ` KOSAKI Motohiro
  2008-07-07 20:38     ` Mathieu Desnoyers
  0 siblings, 1 reply; 24+ messages in thread
From: KOSAKI Motohiro @ 2008-07-05  9:42 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: kosaki.motohiro, akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt, linux-mm, Dave Hansen,
	Hideo AOKI, Takashi Nishiie, Masami Hiramatsu

> Memory management core events.
> 
> Added tracepoints :
> 
> mm_filemap_wait_end
> mm_filemap_wait_start
> mm_handle_fault_entry
> mm_handle_fault_exit
> mm_huge_page_alloc
> mm_huge_page_free
> mm_page_alloc
> mm_page_free
> mm_swap_file_close
> mm_swap_file_open
> mm_swap_in
> mm_swap_out

Mathieu, this patch is too large and have multiple change.
memory subsystem have some feature and is developed by many people.

So, nobody can ack it.
Could you split to more small patch?

and, this patch description is very poor.

I guess

> mm_filemap_wait_end
> mm_filemap_wait_start
	for latency statics by lock_page delay

	if so, we should know who have locking.


> mm_handle_fault_entry
> mm_handle_fault_exit
	??
	please explain.

> mm_page_alloc
> mm_page_free
	for memory leak track
	for memory eater sort out
	etc..

> mm_huge_page_alloc
> mm_huge_page_free
	ditto
	(but, huge page is developed by another person against normal page alloc
	 so, patch separating is better)

> mm_swap_file_close
> mm_swap_file_open
	??
	What do you suppose usage?

> mm_swap_in
> mm_swap_out
	for swap usage statics
	for swap delay accounting


and, some tracepoint is putted on performance critical function.
So, you should write performance result in patch description.


> Index: linux-2.6-lttng/mm/filemap.c
> ===================================================================
> --- linux-2.6-lttng.orig/mm/filemap.c	2008-07-04 18:26:02.000000000 -0400
> +++ linux-2.6-lttng/mm/filemap.c	2008-07-04 18:26:37.000000000 -0400
> @@ -33,6 +33,7 @@
>  #include <linux/cpuset.h>
>  #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
>  #include <linux/memcontrol.h>
> +#include "mm-trace.h"
>  #include "internal.h"
>  
>  /*
> @@ -540,9 +541,11 @@ void wait_on_page_bit(struct page *page,
>  {
>  	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
>  
> +	trace_mm_filemap_wait_start(page, bit_nr);
>  	if (test_bit(bit_nr, &page->flags))
>  		__wait_on_bit(page_waitqueue(page), &wait, sync_page,
>  							TASK_UNINTERRUPTIBLE);
> +	trace_mm_filemap_wait_end(page, bit_nr);
>  }
>  EXPORT_SYMBOL(wait_on_page_bit);

looks good to me.


>  
> Index: linux-2.6-lttng/mm/memory.c
> ===================================================================
> --- linux-2.6-lttng.orig/mm/memory.c	2008-07-04 18:26:02.000000000 -0400
> +++ linux-2.6-lttng/mm/memory.c	2008-07-04 18:26:37.000000000 -0400
> @@ -51,6 +51,7 @@
>  #include <linux/init.h>
>  #include <linux/writeback.h>
>  #include <linux/memcontrol.h>
> +#include "mm-trace.h"
>  
>  #include <asm/pgalloc.h>
>  #include <asm/uaccess.h>
> @@ -2201,6 +2202,7 @@ static int do_swap_page(struct mm_struct
>  		/* Had to read the page from swap area: Major fault */
>  		ret = VM_FAULT_MAJOR;
>  		count_vm_event(PGMAJFAULT);
> +		trace_mm_swap_in(page, entry);
>  	}
>  
>  	if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {

somebody want get swapin delaying statics.
(see delayacct_set_flag() and delayacct_clear_flag())

if swap cache exist, swapin can end very faster.
otherwise, spend very long time.


> +	trace_mm_handle_fault_entry(address, write_access);
> +
>  	__set_current_state(TASK_RUNNING);
>  
>  	count_vm_event(PGFAULT);

mm or vma passing is better?
otherwise, adress is ambiguity.

> -	if (unlikely(is_vm_hugetlb_page(vma)))
> -		return hugetlb_fault(mm, vma, address, write_access);
> +	if (unlikely(is_vm_hugetlb_page(vma))) {
> +		res = hugetlb_fault(mm, vma, address, write_access);
> +		goto end;
> +	}
>  
>  	pgd = pgd_offset(mm, address);
>  	pud = pud_alloc(mm, pgd, address);
> -	if (!pud)
> -		return VM_FAULT_OOM;
> +	if (!pud) {
> +		res = VM_FAULT_OOM;
> +		goto end;
> +	}
>  	pmd = pmd_alloc(mm, pud, address);
> -	if (!pmd)
> -		return VM_FAULT_OOM;
> +	if (!pmd) {
> +		res = VM_FAULT_OOM;
> +		goto end;
> +	}
>  	pte = pte_alloc_map(mm, pmd, address);
> -	if (!pte)
> -		return VM_FAULT_OOM;
> +	if (!pte) {
> +		res = VM_FAULT_OOM;
> +		goto end;
> +	}
>  
> -	return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
> +	res = handle_pte_fault(mm, vma, address, pte, pmd, write_access);
> +end:
> +	trace_mm_handle_fault_exit();
> +	return res;
>  }

no argument?
if two page fault happend in parallel, how do you sort out this two fault?

and, IMHO res variable is very important.
because it is OOM related.
many MM trouble shooting is worked for OOM related.


>  #ifndef __PAGETABLE_PUD_FOLDED
> Index: linux-2.6-lttng/mm/page_alloc.c
> ===================================================================
> --- linux-2.6-lttng.orig/mm/page_alloc.c	2008-07-04 18:26:02.000000000 -0400
> +++ linux-2.6-lttng/mm/page_alloc.c	2008-07-04 18:26:37.000000000 -0400
> @@ -46,6 +46,7 @@
>  #include <linux/page-isolation.h>
>  #include <linux/memcontrol.h>
>  #include <linux/debugobjects.h>
> +#include "mm-trace.h"
>  
>  #include <asm/tlbflush.h>
>  #include <asm/div64.h>
> @@ -510,6 +511,8 @@ static void __free_pages_ok(struct page 
>  	int i;
>  	int reserved = 0;
>  
> +	trace_mm_page_free(page, order);
> +
>  	for (i = 0 ; i < (1 << order) ; ++i)
>  		reserved += free_pages_check(page + i);
>  	if (reserved)
> @@ -966,6 +969,8 @@ static void free_hot_cold_page(struct pa
>  	struct per_cpu_pages *pcp;
>  	unsigned long flags;
>  
> +	trace_mm_page_free(page, 0);
> +
>  	if (PageAnon(page))
>  		page->mapping = NULL;
>  	if (free_pages_check(page))
> @@ -1630,6 +1635,7 @@ nopage:
>  		show_mem();
>  	}
>  got_pg:
> +	trace_mm_page_alloc(page, order);
>  	return page;
>  }
>  

please pass current task.
I guess somebody need memory allocation tracking.



> Index: linux-2.6-lttng/mm/page_io.c
> ===================================================================
> --- linux-2.6-lttng.orig/mm/page_io.c	2008-07-04 18:26:02.000000000 -0400
> +++ linux-2.6-lttng/mm/page_io.c	2008-07-04 18:26:37.000000000 -0400
> @@ -17,6 +17,7 @@
>  #include <linux/bio.h>
>  #include <linux/swapops.h>
>  #include <linux/writeback.h>
> +#include "mm-trace.h"
>  #include <asm/pgtable.h>
>  
>  static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
> @@ -114,6 +115,7 @@ int swap_writepage(struct page *page, st
>  		rw |= (1 << BIO_RW_SYNC);
>  	count_vm_event(PSWPOUT);
>  	set_page_writeback(page);
> +	trace_mm_swap_out(page);
>  	unlock_page(page);
>  	submit_bio(rw, bio);
>  out:

this tracepoint probe swapout starting, right.
So, Why you don't probe swapout end?



> Index: linux-2.6-lttng/mm/hugetlb.c
> ===================================================================
> --- linux-2.6-lttng.orig/mm/hugetlb.c	2008-07-04 18:26:02.000000000 -0400
> +++ linux-2.6-lttng/mm/hugetlb.c	2008-07-04 18:26:37.000000000 -0400
> @@ -14,6 +14,7 @@
>  #include <linux/mempolicy.h>
>  #include <linux/cpuset.h>
>  #include <linux/mutex.h>
> +#include "mm-trace.h"
>  
>  #include <asm/page.h>
>  #include <asm/pgtable.h>
> @@ -141,6 +142,7 @@ static void free_huge_page(struct page *
>  	int nid = page_to_nid(page);
>  	struct address_space *mapping;
>  
> +	trace_mm_huge_page_free(page);
>  	mapping = (struct address_space *) page_private(page);
>  	set_page_private(page, 0);
>  	BUG_ON(page_count(page));
> @@ -509,6 +511,7 @@ static struct page *alloc_huge_page(stru
>  	if (!IS_ERR(page)) {
>  		set_page_refcounted(page);
>  		set_page_private(page, (unsigned long) mapping);
> +		trace_mm_huge_page_alloc(page);
>  	}
>  	return page;
>  }

this tracepoint probe to HugePages_Free change, right?
Why you don't probe HugePages_Total and HugePages_Rsvd change?


> Index: linux-2.6-lttng/mm/swapfile.c
> ===================================================================
> --- linux-2.6-lttng.orig/mm/swapfile.c	2008-07-04 18:26:02.000000000 -0400
> +++ linux-2.6-lttng/mm/swapfile.c	2008-07-04 18:26:37.000000000 -0400
> @@ -32,6 +32,7 @@
>  #include <asm/pgtable.h>
>  #include <asm/tlbflush.h>
>  #include <linux/swapops.h>
> +#include "mm-trace.h"
>  
>  DEFINE_SPINLOCK(swap_lock);
>  unsigned int nr_swapfiles;

> @@ -1310,6 +1311,7 @@ asmlinkage long sys_swapoff(const char _
>  	swap_map = p->swap_map;
>  	p->swap_map = NULL;
>  	p->flags = 0;
> +	trace_mm_swap_file_close(swap_file);
>  	spin_unlock(&swap_lock);
>  	mutex_unlock(&swapon_mutex);
>  	vfree(swap_map);

Why you choose this point?
and why you don't pass pathname? (you pass it in sys_swapon()) 

IMHO try_to_unuse cause many memory activity and spend many time and 
often cause oom-killer.

I think this point log is needed by somebody.


> @@ -1695,6 +1697,7 @@ asmlinkage long sys_swapon(const char __
>  	} else {
>  		swap_info[prev].next = p - swap_info;
>  	}
> +	trace_mm_swap_file_open(swap_file, name);
>  	spin_unlock(&swap_lock);
>  	mutex_unlock(&swapon_mutex);
>  	error = 0;




^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC patch 00/12] Tracepoints v2
  2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
                   ` (11 preceding siblings ...)
  2008-07-04 23:52 ` [RFC patch 12/12] LTTng instrumentation net " Mathieu Desnoyers
@ 2008-07-05 23:27 ` Eduard - Gabriel Munteanu
  2008-07-07 13:43   ` Mathieu Desnoyers
  12 siblings, 1 reply; 24+ messages in thread
From: Eduard - Gabriel Munteanu @ 2008-07-05 23:27 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt

On Fri, 04 Jul 2008 19:52:07 -0400
Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:

> Hi,
> 
> Here is the second release of kernel tracepoints, including the
> architecture independent instrumentation taken from LTTng. I submit
> this for another round of comments.

Hi,

Not using format specifiers sounds pretty nice. Are there any other
advantages over markers? What about self-modifying code, does it employ
this technique to eliminate conditional branching (markers did this on
some arches, IIRC)?

I'm considering using this for kmemtrace, so please keep me Cc-ed if
possible.


	Cheers,
	Eduard

P.S.: BTW, the To header is either broken or I screwed it (I'm reading
this via Gmane's NNTP):
"Steven Rostedt <ros"

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC patch 00/12] Tracepoints v2
  2008-07-05 23:27 ` [RFC patch 00/12] Tracepoints v2 Eduard - Gabriel Munteanu
@ 2008-07-07 13:43   ` Mathieu Desnoyers
  0 siblings, 0 replies; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-07 13:43 UTC (permalink / raw)
  To: Eduard - Gabriel Munteanu
  Cc: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt

* Eduard - Gabriel Munteanu (eduard.munteanu@linux360.ro) wrote:
> On Fri, 04 Jul 2008 19:52:07 -0400
> Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:
> 
> > Hi,
> > 
> > Here is the second release of kernel tracepoints, including the
> > architecture independent instrumentation taken from LTTng. I submit
> > this for another round of comments.
> 
> Hi,
> 
> Not using format specifiers sounds pretty nice. Are there any other
> advantages over markers? What about self-modifying code, does it employ
> this technique to eliminate conditional branching (markers did this on
> some arches, IIRC)?
> 

The main advantage is to remove the format strings. It also permits to
declare the tracepoints in a header file instead of only spreading them
all over the kernel code. It aims at providing an internal kernel
interface to kernel tracers, compared to markers which are eventually
meant to export the trace information, along with the format specifiers,
to userspace. The version I sent here does not use immediate values
(self-modifying code), but the one I have in my patchset does.

> I'm considering using this for kmemtrace, so please keep me Cc-ed if
> possible.
> 

Sure, I will,

Mathieu

> 
> 	Cheers,
> 	Eduard
> 
> P.S.: BTW, the To header is either broken or I screwed it (I'm reading
> this via Gmane's NNTP):
> "Steven Rostedt <ros"

Ok, will fix in the next post. Thanks.

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC patch 01/12] Kernel Tracepoints
  2008-07-04 23:52 ` [RFC patch 01/12] Kernel Tracepoints Mathieu Desnoyers
@ 2008-07-07 16:27   ` Masami Hiramatsu
  2008-07-08 20:37     ` Masami Hiramatsu
  0 siblings, 1 reply; 24+ messages in thread
From: Masami Hiramatsu @ 2008-07-07 16:27 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt, Hideo AOKI, Takashi Nishiie,
	Alexander Viro

Mathieu Desnoyers wrote:
> Implementation of kernel tracepoints. Inspired from the Linux Kernel Markers.
> 
> Allows complete typing verification. No format string required.
> 
> TODO : Documentation/tracepoint.txt
> 
> Changelog :
> - Use #name ":" #proto as string to identify the tracepoint in the
>   tracepoint table. This will make sure not type mismatch happens due to
>   connexion of a probe with the wrong type to a tracepoint declared with
>   the same name in a different header.
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> CC: 'Peter Zijlstra' <peterz@infradead.org>
> CC: "Frank Ch. Eigler" <fche@redhat.com>
> CC: 'Ingo Molnar' <mingo@elte.hu>
> CC: 'Hideo AOKI' <haoki@redhat.com>
> CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
> CC: 'Steven Rostedt' <rostedt@goodmis.org>
> CC: Alexander Viro <viro@zeniv.linux.org.uk>

Thanks, this is good to me. Tested on x86-64.
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>

> ---
>  include/asm-generic/vmlinux.lds.h |    6 
>  include/linux/module.h            |   17 +
>  include/linux/tracepoint.h        |  123 +++++++++
>  init/Kconfig                      |    7 
>  kernel/Makefile                   |    1 
>  kernel/module.c                   |   66 +++++
>  kernel/tracepoint.c               |  474 ++++++++++++++++++++++++++++++++++++++
>  7 files changed, 692 insertions(+), 2 deletions(-)
> 
> Index: linux-2.6-lttng/init/Kconfig
> ===================================================================
> --- linux-2.6-lttng.orig/init/Kconfig	2008-07-04 10:48:08.000000000 -0400
> +++ linux-2.6-lttng/init/Kconfig	2008-07-04 11:12:22.000000000 -0400
> @@ -782,6 +782,13 @@ config PROFILING
>  	  Say Y here to enable the extended profiling support mechanisms used
>  	  by profilers such as OProfile.
>  
> +config TRACEPOINTS
> +	bool "Activate tracepoints"
> +	default y
> +	help
> +	  Place an empty function call at each tracepoint site. Can be
> +	  dynamically changed for a probe function.
> +
>  config MARKERS
>  	bool "Activate markers"
>  	help
> Index: linux-2.6-lttng/kernel/Makefile
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/Makefile	2008-07-04 10:48:08.000000000 -0400
> +++ linux-2.6-lttng/kernel/Makefile	2008-07-04 11:10:41.000000000 -0400
> @@ -68,6 +68,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
>  obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
>  obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
>  obj-$(CONFIG_MARKERS) += marker.o
> +obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
>  obj-$(CONFIG_LATENCYTOP) += latencytop.o
>  
>  ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
> Index: linux-2.6-lttng/include/linux/tracepoint.h
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-lttng/include/linux/tracepoint.h	2008-07-04 11:10:39.000000000 -0400
> @@ -0,0 +1,123 @@
> +#ifndef _LINUX_TRACEPOINT_H
> +#define _LINUX_TRACEPOINT_H
> +
> +/*
> + * Kernel Tracepoint API.
> + *
> + * See Documentation/tracepoint.txt.
> + *
> + * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> + *
> + * Heavily inspired from the Linux Kernel Markers.
> + *
> + * This file is released under the GPLv2.
> + * See the file COPYING for more details.
> + */
> +
> +#include <linux/types.h>
> +
> +struct module;
> +struct tracepoint;
> +
> +struct tracepoint {
> +	const char *name;		/* Tracepoint name */
> +	int state;			/* State. */
> +	void **funcs;
> +} __attribute__((aligned(8)));
> +
> +
> +#define TPPROTO(args...)	args
> +#define TPARGS(args...)		args
> +
> +#ifdef CONFIG_TRACEPOINTS
> +
> +#define __DO_TRACE(tp, proto, args)					\
> +	do {								\
> +		int i;							\
> +		void **funcs;						\
> +		preempt_disable();					\
> +		funcs = (tp)->funcs;					\
> +		smp_read_barrier_depends();				\
> +		if (funcs) {						\
> +			for (i = 0; funcs[i]; i++) {			\
> +				((void(*)(proto))(funcs[i]))(args);	\
> +			}						\
> +		}							\
> +		preempt_enable();					\
> +	} while (0)
> +
> +/*
> + * Make sure the alignment of the structure in the __tracepoints section will
> + * not add unwanted padding between the beginning of the section and the
> + * structure. Force alignment to the same alignment as the section start.
> + */
> +#define DEFINE_TRACE(name, proto, args)					\
> +	static inline void trace_##name(proto)				\
> +	{								\
> +		static const char __tpstrtab_##name[]			\
> +		__attribute__((section("__tracepoints_strings")))	\
> +		= #name ":" #proto;					\
> +		static struct tracepoint __tracepoint_##name		\
> +		__attribute__((section("__tracepoints"), aligned(8))) =	\
> +		{ __tpstrtab_##name, 0, NULL };				\
> +		if (unlikely(__tracepoint_##name.state))		\
> +			__DO_TRACE(&__tracepoint_##name,		\
> +				TPPROTO(proto), TPARGS(args));		\
> +	}								\
> +	static inline int register_trace_##name(void (*probe)(proto))	\
> +	{								\
> +		return tracepoint_probe_register(#name ":" #proto,	\
> +			(void *)probe);					\
> +	}								\
> +	static inline void unregister_trace_##name(void (*probe)(proto))\
> +	{								\
> +		tracepoint_probe_unregister(#name ":" #proto,		\
> +			(void *)probe);					\
> +	}
> +
> +extern void tracepoint_update_probe_range(struct tracepoint *begin,
> +	struct tracepoint *end);
> +
> +#else /* !CONFIG_TRACEPOINTS */
> +#define DEFINE_TRACE(name, proto, args)			\
> +	static inline void _do_trace_##name(struct tracepoint *tp, proto) \
> +	{ }								\
> +	static inline void trace_##name(proto)				\
> +	{ }								\
> +	static inline int register_trace_##name(void (*probe)(proto))	\
> +	{								\
> +		return -ENOSYS;						\
> +	}								\
> +	static inline void unregister_trace_##name(void (*probe)(proto))\
> +	{ }
> +
> +static inline void tracepoint_update_probe_range(struct tracepoint *begin,
> +	struct tracepoint *end)
> +{ }
> +#endif /* CONFIG_TRACEPOINTS */
> +
> +/*
> + * Connect a probe to a tracepoint.
> + * Internal API, should not be used directly.
> + */
> +extern int tracepoint_probe_register(const char *name, void *probe);
> +
> +/*
> + * Disconnect a probe from a tracepoint.
> + * Internal API, should not be used directly.
> + */
> +extern int tracepoint_probe_unregister(const char *name, void *probe);
> +
> +struct tracepoint_iter {
> +	struct module *module;
> +	struct tracepoint *tracepoint;
> +};
> +
> +extern void tracepoint_iter_start(struct tracepoint_iter *iter);
> +extern void tracepoint_iter_next(struct tracepoint_iter *iter);
> +extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
> +extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
> +extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
> +	struct tracepoint *begin, struct tracepoint *end);
> +
> +#endif
> Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
> ===================================================================
> --- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2008-07-04 10:48:08.000000000 -0400
> +++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2008-07-04 11:10:41.000000000 -0400
> @@ -52,7 +52,10 @@
>  	. = ALIGN(8);							\
>  	VMLINUX_SYMBOL(__start___markers) = .;				\
>  	*(__markers)							\
> -	VMLINUX_SYMBOL(__stop___markers) = .;
> +	VMLINUX_SYMBOL(__stop___markers) = .;				\
> +	VMLINUX_SYMBOL(__start___tracepoints) = .;			\
> +	*(__tracepoints)						\
> +	VMLINUX_SYMBOL(__stop___tracepoints) = .;
>  
>  #define RO_DATA(align)							\
>  	. = ALIGN((align));						\
> @@ -61,6 +64,7 @@
>  		*(.rodata) *(.rodata.*)					\
>  		*(__vermagic)		/* Kernel version magic */	\
>  		*(__markers_strings)	/* Markers: strings */		\
> +		*(__tracepoints_strings)/* Tracepoints: strings */	\
>  	}								\
>  									\
>  	.rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {		\
> Index: linux-2.6-lttng/kernel/tracepoint.c
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-lttng/kernel/tracepoint.c	2008-07-04 11:10:39.000000000 -0400
> @@ -0,0 +1,474 @@
> +/*
> + * Copyright (C) 2008 Mathieu Desnoyers
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> + */
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/types.h>
> +#include <linux/jhash.h>
> +#include <linux/list.h>
> +#include <linux/rcupdate.h>
> +#include <linux/tracepoint.h>
> +#include <linux/err.h>
> +#include <linux/slab.h>
> +
> +extern struct tracepoint __start___tracepoints[];
> +extern struct tracepoint __stop___tracepoints[];
> +
> +/* Set to 1 to enable tracepoint debug output */
> +static const int tracepoint_debug;
> +
> +/*
> + * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
> + * builtin and module tracepoints and the hash table.
> + */
> +static DEFINE_MUTEX(tracepoints_mutex);
> +
> +/*
> + * Tracepoint hash table, containing the active tracepoints.
> + * Protected by tracepoints_mutex.
> + */
> +#define TRACEPOINT_HASH_BITS 6
> +#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
> +
> +/*
> + * Note about RCU :
> + * It is used to to delay the free of multiple probes array until a quiescent
> + * state is reached.
> + * Tracepoint entries modifications are protected by the tracepoints_mutex.
> + */
> +struct tracepoint_entry {
> +	struct hlist_node hlist;
> +	void **funcs;
> +	int refcount;	/* Number of times armed. 0 if disarmed. */
> +	struct rcu_head rcu;
> +	void *oldptr;
> +	unsigned char rcu_pending:1;
> +	char name[0];
> +};
> +
> +static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
> +
> +static void free_old_closure(struct rcu_head *head)
> +{
> +	struct tracepoint_entry *entry = container_of(head,
> +		struct tracepoint_entry, rcu);
> +	kfree(entry->oldptr);
> +	/* Make sure we free the data before setting the pending flag to 0 */
> +	smp_wmb();
> +	entry->rcu_pending = 0;
> +}
> +
> +static void debug_print_probes(struct tracepoint_entry *entry)
> +{
> +	int i;
> +
> +	if (!tracepoint_debug)
> +		return;
> +
> +	for (i = 0; entry->funcs[i]; i++)
> +		printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
> +}
> +
> +static void *
> +tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
> +{
> +	int nr_probes = 0;
> +	void **old, **new;
> +
> +	WARN_ON(!probe);
> +
> +	debug_print_probes(entry);
> +	old = entry->funcs;
> +	if (old) {
> +		/* (N -> N+1), (N != 0, 1) probes */
> +		for (nr_probes = 0; old[nr_probes]; nr_probes++)
> +			if (old[nr_probes] == probe)
> +				return ERR_PTR(-EBUSY);
> +	}
> +	/* + 2 : one for new probe, one for NULL func */
> +	new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
> +	if (new == NULL)
> +		return ERR_PTR(-ENOMEM);
> +	if (old)
> +		memcpy(new, old, nr_probes * sizeof(void *));
> +	new[nr_probes] = probe;
> +	entry->refcount = nr_probes + 1;
> +	entry->funcs = new;
> +	debug_print_probes(entry);
> +	return old;
> +}
> +
> +static void *
> +tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
> +{
> +	int nr_probes = 0, nr_del = 0, i;
> +	void **old, **new;
> +
> +	old = entry->funcs;
> +
> +	debug_print_probes(entry);
> +	/* (N -> M), (N > 1, M >= 0) probes */
> +	for (nr_probes = 0; old[nr_probes]; nr_probes++) {
> +		if ((!probe || old[nr_probes] == probe))
> +			nr_del++;
> +	}
> +
> +	if (nr_probes - nr_del == 0) {
> +		/* N -> 0, (N > 1) */
> +		entry->funcs = NULL;
> +		entry->refcount = 0;
> +		debug_print_probes(entry);
> +		return old;
> +	} else {
> +		int j = 0;
> +		/* N -> M, (N > 1, M > 0) */
> +		/* + 1 for NULL */
> +		new = kzalloc((nr_probes - nr_del + 1)
> +			* sizeof(void *), GFP_KERNEL);
> +		if (new == NULL)
> +			return ERR_PTR(-ENOMEM);
> +		for (i = 0; old[i]; i++)
> +			if ((probe && old[i] != probe))
> +				new[j++] = old[i];
> +		entry->refcount = nr_probes - nr_del;
> +		entry->funcs = new;
> +	}
> +	debug_print_probes(entry);
> +	return old;
> +}
> +
> +/*
> + * Get tracepoint if the tracepoint is present in the tracepoint hash table.
> + * Must be called with tracepoints_mutex held.
> + * Returns NULL if not present.
> + */
> +static struct tracepoint_entry *get_tracepoint(const char *name)
> +{
> +	struct hlist_head *head;
> +	struct hlist_node *node;
> +	struct tracepoint_entry *e;
> +	u32 hash = jhash(name, strlen(name), 0);
> +
> +	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
> +	hlist_for_each_entry(e, node, head, hlist) {
> +		if (!strcmp(name, e->name))
> +			return e;
> +	}
> +	return NULL;
> +}
> +
> +/*
> + * Add the tracepoint to the tracepoint hash table. Must be called with
> + * tracepoints_mutex held.
> + */
> +static struct tracepoint_entry *add_tracepoint(const char *name)
> +{
> +	struct hlist_head *head;
> +	struct hlist_node *node;
> +	struct tracepoint_entry *e;
> +	size_t name_len = strlen(name) + 1;
> +	u32 hash = jhash(name, name_len-1, 0);
> +
> +	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
> +	hlist_for_each_entry(e, node, head, hlist) {
> +		if (!strcmp(name, e->name)) {
> +			printk(KERN_NOTICE
> +				"tracepoint %s busy\n", name);
> +			return ERR_PTR(-EBUSY);	/* Already there */
> +		}
> +	}
> +	/*
> +	 * Using kmalloc here to allocate a variable length element. Could
> +	 * cause some memory fragmentation if overused.
> +	 */
> +	e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
> +	if (!e)
> +		return ERR_PTR(-ENOMEM);
> +	memcpy(&e->name[0], name, name_len);
> +	e->funcs = NULL;
> +	e->refcount = 0;
> +	e->rcu_pending = 0;
> +	hlist_add_head(&e->hlist, head);
> +	return e;
> +}
> +
> +/*
> + * Remove the tracepoint from the tracepoint hash table. Must be called with
> + * mutex_lock held.
> + */
> +static int remove_tracepoint(const char *name)
> +{
> +	struct hlist_head *head;
> +	struct hlist_node *node;
> +	struct tracepoint_entry *e;
> +	int found = 0;
> +	size_t len = strlen(name) + 1;
> +	u32 hash = jhash(name, len-1, 0);
> +
> +	head = &tracepoint_table[hash & ((1 << TRACEPOINT_HASH_BITS)-1)];
> +	hlist_for_each_entry(e, node, head, hlist) {
> +		if (!strcmp(name, e->name)) {
> +			found = 1;
> +			break;
> +		}
> +	}
> +	if (!found)
> +		return -ENOENT;
> +	if (e->refcount)
> +		return -EBUSY;
> +	hlist_del(&e->hlist);
> +	/* Make sure the call_rcu has been executed */
> +	if (e->rcu_pending)
> +		rcu_barrier();
> +	kfree(e);
> +	return 0;
> +}
> +
> +/*
> + * Sets the probe callback corresponding to one tracepoint.
> + */
> +static void set_tracepoint(struct tracepoint_entry **entry,
> +	struct tracepoint *elem, int active)
> +{
> +	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
> +
> +	smp_wmb();
> +	/*
> +	 * We also make sure that the new probe callbacks array is consistent
> +	 * before setting a pointer to it.
> +	 */
> +	rcu_assign_pointer(elem->funcs, (*entry)->funcs);
> +	elem->state = active;
> +}
> +
> +/*
> + * Disable a tracepoint and its probe callback.
> + * Note: only waiting an RCU period after setting elem->call to the empty
> + * function insures that the original callback is not used anymore. This insured
> + * by preempt_disable around the call site.
> + */
> +static void disable_tracepoint(struct tracepoint *elem)
> +{
> +	elem->state = 0;
> +}
> +
> +/**
> + * tracepoint_update_probe_range - Update a probe range
> + * @begin: beginning of the range
> + * @end: end of the range
> + *
> + * Updates the probe callback corresponding to a range of tracepoints.
> + */
> +void tracepoint_update_probe_range(struct tracepoint *begin,
> +	struct tracepoint *end)
> +{
> +	struct tracepoint *iter;
> +	struct tracepoint_entry *mark_entry;
> +
> +	mutex_lock(&tracepoints_mutex);
> +	for (iter = begin; iter < end; iter++) {
> +		mark_entry = get_tracepoint(iter->name);
> +		if (mark_entry) {
> +			set_tracepoint(&mark_entry, iter,
> +					!!mark_entry->refcount);
> +		} else {
> +			disable_tracepoint(iter);
> +		}
> +	}
> +	mutex_unlock(&tracepoints_mutex);
> +}
> +
> +/*
> + * Update probes, removing the faulty probes.
> + */
> +static void tracepoint_update_probes(void)
> +{
> +	/* Core kernel tracepoints */
> +	tracepoint_update_probe_range(__start___tracepoints,
> +		__stop___tracepoints);
> +	/* tracepoints in modules. */
> +	module_update_tracepoints();
> +}
> +
> +/**
> + * tracepoint_probe_register -  Connect a probe to a tracepoint
> + * @name: tracepoint name
> + * @probe: probe handler
> + *
> + * Returns 0 if ok, error value on error.
> + * The probe address must at least be aligned on the architecture pointer size.
> + */
> +int tracepoint_probe_register(const char *name, void *probe)
> +{
> +	struct tracepoint_entry *entry;
> +	int ret = 0;
> +	void *old;
> +
> +	mutex_lock(&tracepoints_mutex);
> +	entry = get_tracepoint(name);
> +	if (!entry) {
> +		entry = add_tracepoint(name);
> +		if (IS_ERR(entry)) {
> +			ret = PTR_ERR(entry);
> +			goto end;
> +		}
> +	}
> +	/*
> +	 * If we detect that a call_rcu is pending for this tracepoint,
> +	 * make sure it's executed now.
> +	 */
> +	if (entry->rcu_pending)
> +		rcu_barrier();
> +	old = tracepoint_entry_add_probe(entry, probe);
> +	if (IS_ERR(old)) {
> +		ret = PTR_ERR(old);
> +		goto end;
> +	}
> +	mutex_unlock(&tracepoints_mutex);
> +	tracepoint_update_probes();		/* may update entry */
> +	mutex_lock(&tracepoints_mutex);
> +	entry = get_tracepoint(name);
> +	WARN_ON(!entry);
> +	entry->oldptr = old;
> +	entry->rcu_pending = 1;
> +	/* write rcu_pending before calling the RCU callback */
> +	smp_wmb();
> +#ifdef CONFIG_PREEMPT_RCU
> +	synchronize_sched();	/* Until we have the call_rcu_sched() */
> +#endif
> +	call_rcu(&entry->rcu, free_old_closure);
> +end:
> +	mutex_unlock(&tracepoints_mutex);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(tracepoint_probe_register);
> +
> +/**
> + * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
> + * @name: tracepoint name
> + * @probe: probe function pointer
> + *
> + * We do not need to call a synchronize_sched to make sure the probes have
> + * finished running before doing a module unload, because the module unload
> + * itself uses stop_machine(), which insures that every preempt disabled section
> + * have finished.
> + */
> +int tracepoint_probe_unregister(const char *name, void *probe)
> +{
> +	struct tracepoint_entry *entry;
> +	void *old;
> +	int ret = -ENOENT;
> +
> +	mutex_lock(&tracepoints_mutex);
> +	entry = get_tracepoint(name);
> +	if (!entry)
> +		goto end;
> +	if (entry->rcu_pending)
> +		rcu_barrier();
> +	old = tracepoint_entry_remove_probe(entry, probe);
> +	mutex_unlock(&tracepoints_mutex);
> +	tracepoint_update_probes();		/* may update entry */
> +	mutex_lock(&tracepoints_mutex);
> +	entry = get_tracepoint(name);
> +	if (!entry)
> +		goto end;
> +	entry->oldptr = old;
> +	entry->rcu_pending = 1;
> +	/* write rcu_pending before calling the RCU callback */
> +	smp_wmb();
> +#ifdef CONFIG_PREEMPT_RCU
> +	synchronize_sched();	/* Until we have the call_rcu_sched() */
> +#endif
> +	call_rcu(&entry->rcu, free_old_closure);
> +	remove_tracepoint(name);	/* Ignore busy error message */
> +	ret = 0;
> +end:
> +	mutex_unlock(&tracepoints_mutex);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
> +
> +/**
> + * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
> + * @tracepoint: current tracepoints (in), next tracepoint (out)
> + * @begin: beginning of the range
> + * @end: end of the range
> + *
> + * Returns whether a next tracepoint has been found (1) or not (0).
> + * Will return the first tracepoint in the range if the input tracepoint is
> + * NULL.
> + */
> +int tracepoint_get_iter_range(struct tracepoint **tracepoint,
> +	struct tracepoint *begin, struct tracepoint *end)
> +{
> +	if (!*tracepoint && begin != end) {
> +		*tracepoint = begin;
> +		return 1;
> +	}
> +	if (*tracepoint >= begin && *tracepoint < end)
> +		return 1;
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
> +
> +static void tracepoint_get_iter(struct tracepoint_iter *iter)
> +{
> +	int found = 0;
> +
> +	/* Core kernel tracepoints */
> +	if (!iter->module) {
> +		found = tracepoint_get_iter_range(&iter->tracepoint,
> +				__start___tracepoints, __stop___tracepoints);
> +		if (found)
> +			goto end;
> +	}
> +	/* tracepoints in modules. */
> +	found = module_get_iter_tracepoints(iter);
> +end:
> +	if (!found)
> +		tracepoint_iter_reset(iter);
> +}
> +
> +void tracepoint_iter_start(struct tracepoint_iter *iter)
> +{
> +	tracepoint_get_iter(iter);
> +}
> +EXPORT_SYMBOL_GPL(tracepoint_iter_start);
> +
> +void tracepoint_iter_next(struct tracepoint_iter *iter)
> +{
> +	iter->tracepoint++;
> +	/*
> +	 * iter->tracepoint may be invalid because we blindly incremented it.
> +	 * Make sure it is valid by marshalling on the tracepoints, getting the
> +	 * tracepoints from following modules if necessary.
> +	 */
> +	tracepoint_get_iter(iter);
> +}
> +EXPORT_SYMBOL_GPL(tracepoint_iter_next);
> +
> +void tracepoint_iter_stop(struct tracepoint_iter *iter)
> +{
> +}
> +EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
> +
> +void tracepoint_iter_reset(struct tracepoint_iter *iter)
> +{
> +	iter->module = NULL;
> +	iter->tracepoint = NULL;
> +}
> +EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
> Index: linux-2.6-lttng/kernel/module.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/module.c	2008-07-04 10:48:08.000000000 -0400
> +++ linux-2.6-lttng/kernel/module.c	2008-07-04 11:12:39.000000000 -0400
> @@ -46,6 +46,7 @@
>  #include <asm/cacheflush.h>
>  #include <linux/license.h>
>  #include <asm/sections.h>
> +#include <linux/tracepoint.h>
>  
>  #if 0
>  #define DEBUGP printk
> @@ -1770,6 +1771,8 @@ static struct module *load_module(void _
>  	unsigned int unusedgplcrcindex;
>  	unsigned int markersindex;
>  	unsigned int markersstringsindex;
> +	unsigned int tracepointsindex;
> +	unsigned int tracepointsstringsindex;
>  	struct module *mod;
>  	long err = 0;
>  	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
> @@ -2049,6 +2052,9 @@ static struct module *load_module(void _
>  	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
>   	markersstringsindex = find_sec(hdr, sechdrs, secstrings,
>  					"__markers_strings");
> +	tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
> +	tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
> +					"__tracepoints_strings");
>  
>  	/* Now do relocations. */
>  	for (i = 1; i < hdr->e_shnum; i++) {
> @@ -2076,6 +2082,12 @@ static struct module *load_module(void _
>  	mod->num_markers =
>  		sechdrs[markersindex].sh_size / sizeof(*mod->markers);
>  #endif
> +#ifdef CONFIG_TRACEPOINTS
> +	mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
> +	mod->num_tracepoints =
> +		sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
> +#endif
> +
>  
>          /* Find duplicate symbols */
>  	err = verify_export_symbols(mod);
> @@ -2094,11 +2106,16 @@ static struct module *load_module(void _
>  
>  	add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
>  
> +	if (!mod->taints) {
>  #ifdef CONFIG_MARKERS
> -	if (!mod->taints)
>  		marker_update_probe_range(mod->markers,
>  			mod->markers + mod->num_markers);
>  #endif
> +#ifdef CONFIG_TRACEPOINTS
> +		tracepoint_update_probe_range(mod->tracepoints,
> +			mod->tracepoints + mod->num_tracepoints);
> +#endif
> +	}
>  	err = module_finalize(hdr, sechdrs, mod);
>  	if (err < 0)
>  		goto cleanup;
> @@ -2646,3 +2663,50 @@ void module_update_markers(void)
>  	mutex_unlock(&module_mutex);
>  }
>  #endif
> +
> +#ifdef CONFIG_TRACEPOINTS
> +void module_update_tracepoints(void)
> +{
> +	struct module *mod;
> +
> +	mutex_lock(&module_mutex);
> +	list_for_each_entry(mod, &modules, list)
> +		if (!mod->taints)
> +			tracepoint_update_probe_range(mod->tracepoints,
> +				mod->tracepoints + mod->num_tracepoints);
> +	mutex_unlock(&module_mutex);
> +}
> +
> +/*
> + * Returns 0 if current not found.
> + * Returns 1 if current found.
> + */
> +int module_get_iter_tracepoints(struct tracepoint_iter *iter)
> +{
> +	struct module *iter_mod;
> +	int found = 0;
> +
> +	mutex_lock(&module_mutex);
> +	list_for_each_entry(iter_mod, &modules, list) {
> +		if (!iter_mod->taints) {
> +			/*
> +			 * Sorted module list
> +			 */
> +			if (iter_mod < iter->module)
> +				continue;
> +			else if (iter_mod > iter->module)
> +				iter->tracepoint = NULL;
> +			found = tracepoint_get_iter_range(&iter->tracepoint,
> +				iter_mod->tracepoints,
> +				iter_mod->tracepoints
> +					+ iter_mod->num_tracepoints);
> +			if (found) {
> +				iter->module = iter_mod;
> +				break;
> +			}
> +		}
> +	}
> +	mutex_unlock(&module_mutex);
> +	return found;
> +}
> +#endif
> Index: linux-2.6-lttng/include/linux/module.h
> ===================================================================
> --- linux-2.6-lttng.orig/include/linux/module.h	2008-07-04 10:48:08.000000000 -0400
> +++ linux-2.6-lttng/include/linux/module.h	2008-07-04 11:10:41.000000000 -0400
> @@ -16,6 +16,7 @@
>  #include <linux/kobject.h>
>  #include <linux/moduleparam.h>
>  #include <linux/marker.h>
> +#include <linux/tracepoint.h>
>  #include <asm/local.h>
>  
>  #include <asm/module.h>
> @@ -342,6 +343,10 @@ struct module
>  	struct marker *markers;
>  	unsigned int num_markers;
>  #endif
> +#ifdef CONFIG_TRACEPOINTS
> +	struct tracepoint *tracepoints;
> +	unsigned int num_tracepoints;
> +#endif
>  };
>  #ifndef MODULE_ARCH_INIT
>  #define MODULE_ARCH_INIT {}
> @@ -450,6 +455,9 @@ extern void print_modules(void);
>  
>  extern void module_update_markers(void);
>  
> +extern void module_update_tracepoints(void);
> +extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
> +
>  #else /* !CONFIG_MODULES... */
>  #define EXPORT_SYMBOL(sym)
>  #define EXPORT_SYMBOL_GPL(sym)
> @@ -554,6 +562,15 @@ static inline void module_update_markers
>  {
>  }
>  
> +static inline void module_update_tracepoints(void)
> +{
> +}
> +
> +static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
> +{
> +	return 0;
> +}
> +
>  #endif /* CONFIG_MODULES */
>  
>  struct device_driver;
> 

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC patch 07/12] Traceprobes
  2008-07-04 23:52 ` [RFC patch 07/12] Traceprobes Mathieu Desnoyers
@ 2008-07-07 16:28   ` Masami Hiramatsu
  0 siblings, 0 replies; 24+ messages in thread
From: Masami Hiramatsu @ 2008-07-07 16:28 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt, Alexander Viro, Hideo AOKI,
	Takashi Nishiie

Mathieu Desnoyers wrote:
> Menu option to activate tracing probes.
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> CC: Alexander Viro <viro@zeniv.linux.org.uk>
> CC: 'Peter Zijlstra' <peterz@infradead.org>
> CC: "Frank Ch. Eigler" <fche@redhat.com>
> CC: 'Ingo Molnar' <mingo@elte.hu>
> CC: 'Hideo AOKI' <haoki@redhat.com>
> CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
> CC: 'Steven Rostedt' <rostedt@goodmis.org>
> CC: Masami Hiramatsu <mhiramat@redhat.com>

Thanks, this is good to me. Tested on x86-64.
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>

> ---
>  init/Kconfig |    9 +++++++++
>  1 file changed, 9 insertions(+)
> 
> Index: linux-2.6-lttng/init/Kconfig
> ===================================================================
> --- linux-2.6-lttng.orig/init/Kconfig	2008-07-04 09:32:52.000000000 -0400
> +++ linux-2.6-lttng/init/Kconfig	2008-07-04 09:33:05.000000000 -0400
> @@ -795,6 +795,15 @@ config MARKERS
>  	  Place an empty function call at each marker site. Can be
>  	  dynamically changed for a probe function.
>  
> +config TRACEPROBES
> +	tristate "Compile generic tracing probes"
> +	depends on MARKERS
> +	default y
> +	help
> +	  Compile generic tracing probes, which connect to the tracepoints when
> +	  loaded and format the information collected by the tracepoints with
> +	  the Markers.
> +
>  source "arch/Kconfig"
>  
>  config PROC_PAGE_MONITOR
> 

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC patch 04/12] LTTng instrumentation kernel
  2008-07-04 23:52 ` [RFC patch 04/12] LTTng instrumentation kernel Mathieu Desnoyers
@ 2008-07-07 16:36   ` Masami Hiramatsu
  0 siblings, 0 replies; 24+ messages in thread
From: Masami Hiramatsu @ 2008-07-07 16:36 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt, Hideo AOKI, Takashi Nishiie

Mathieu Desnoyers wrote:
> Core kernel events.
> 
> *not* present in this patch because they are architecture specific :
> - syscall entry/exit
> - traps
> - kernel thread creation
> 
> Added markers :
> 
> kernel_irq_entry
> kernel_irq_exit
> kernel_kthread_stop
> kernel_kthread_stop_ret
> kernel_module_free
> kernel_module_load
> kernel_printk
> kernel_process_exit
> kernel_process_fork
> kernel_process_free
> kernel_process_wait
> kernel_sched_migrate_task
> kernel_sched_schedule
> kernel_sched_try_wakeup
> kernel_sched_wait_task
> kernel_sched_wakeup_new_task
> kernel_send_signal
> kernel_softirq_entry
> kernel_softirq_exit
> kernel_softirq_raise
> kernel_tasklet_high_entry
> kernel_tasklet_high_exit
> kernel_tasklet_low_entry
> kernel_tasklet_low_exit
> kernel_timer_itimer_expired
> kernel_timer_itimer_set
> kernel_timer_set
> kernel_timer_timeout
> kernel_timer_update_time
> kernel_vprintk

Hi Mathieu,
I think this patch covers too many subsystems someone to give you ack.
I know these essential events are very important for tracers,
but it is hard to ack it for each subsystem developer.

I think you need to split it into a series of patches, for example,

irq tracepoints:
> kernel_irq_entry
> kernel_irq_exit
> kernel_softirq_entry
> kernel_softirq_exit
> kernel_softirq_raise
> kernel_tasklet_high_entry
> kernel_tasklet_high_exit
> kernel_tasklet_low_entry
> kernel_tasklet_low_exit

scheduler tracepoints:
> kernel_kthread_stop
> kernel_kthread_stop_ret
> kernel_sched_migrate_task
> kernel_sched_schedule
> kernel_sched_try_wakeup
> kernel_sched_wait_task
> kernel_sched_wakeup_new_task
> kernel_process_exit
> kernel_process_fork
> kernel_process_free
> kernel_process_wait
> kernel_send_signal

timer tracepoints:
> kernel_timer_itimer_expired
> kernel_timer_itimer_set
> kernel_timer_set
> kernel_timer_timeout
> kernel_timer_update_time

misc tracepoints:
> kernel_module_free
> kernel_module_load
> kernel_printk
> kernel_vprintk

On the other hand, if you do that, you'd better use DEFINE_TRACE()
in each subsystem's header, instead of kernel-trace.h.

Thank you,

> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> CC: 'Peter Zijlstra' <peterz@infradead.org>
> CC: "Frank Ch. Eigler" <fche@redhat.com>
> CC: 'Ingo Molnar' <mingo@elte.hu>
> CC: 'Hideo AOKI' <haoki@redhat.com>
> CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com>
> CC: 'Steven Rostedt' <rostedt@goodmis.org>
> CC: Masami Hiramatsu <mhiramat@redhat.com>
> ---
>  kernel/exit.c         |    6 ++
>  kernel/fork.c         |    3 +
>  kernel/irq/handle.c   |    6 ++
>  kernel/itimer.c       |    5 ++
>  kernel/kernel-trace.h |  106 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  kernel/kthread.c      |    5 ++
>  kernel/module.c       |    5 ++
>  kernel/printk.c       |    5 ++
>  kernel/sched.c        |    6 ++
>  kernel/signal.c       |    3 +
>  kernel/softirq.c      |    8 +++
>  kernel/timer.c        |    8 +++
>  12 files changed, 165 insertions(+), 1 deletion(-)
> 
> Index: linux-2.6-lttng/kernel/irq/handle.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/irq/handle.c	2008-07-04 16:59:29.000000000 -0400
> +++ linux-2.6-lttng/kernel/irq/handle.c	2008-07-04 17:38:16.000000000 -0400
> @@ -15,6 +15,7 @@
>  #include <linux/random.h>
>  #include <linux/interrupt.h>
>  #include <linux/kernel_stat.h>
> +#include "../kernel-trace.h"
>  
>  #include "internals.h"
>  
> @@ -130,6 +131,9 @@ irqreturn_t handle_IRQ_event(unsigned in
>  {
>  	irqreturn_t ret, retval = IRQ_NONE;
>  	unsigned int status = 0;
> +	struct pt_regs *regs = get_irq_regs();
> +
> +	trace_kernel_irq_entry(irq, regs);
>  
>  	handle_dynamic_tick(action);
>  
> @@ -148,6 +152,8 @@ irqreturn_t handle_IRQ_event(unsigned in
>  		add_interrupt_randomness(irq);
>  	local_irq_disable();
>  
> +	trace_kernel_irq_exit();
> +
>  	return retval;
>  }
>  
> Index: linux-2.6-lttng/kernel/itimer.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/itimer.c	2008-07-04 16:59:29.000000000 -0400
> +++ linux-2.6-lttng/kernel/itimer.c	2008-07-04 17:38:16.000000000 -0400
> @@ -12,6 +12,7 @@
>  #include <linux/time.h>
>  #include <linux/posix-timers.h>
>  #include <linux/hrtimer.h>
> +#include "kernel-trace.h"
>  
>  #include <asm/uaccess.h>
>  
> @@ -132,6 +133,8 @@ enum hrtimer_restart it_real_fn(struct h
>  	struct signal_struct *sig =
>  		container_of(timer, struct signal_struct, real_timer);
>  
> +	trace_kernel_timer_itimer_expired(sig);
> +
>  	kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
>  
>  	return HRTIMER_NORESTART;
> @@ -157,6 +160,8 @@ int do_setitimer(int which, struct itime
>  	    !timeval_valid(&value->it_interval))
>  		return -EINVAL;
>  
> +	trace_kernel_timer_itimer_set(which, value);
> +
>  	switch (which) {
>  	case ITIMER_REAL:
>  again:
> Index: linux-2.6-lttng/kernel/kthread.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/kthread.c	2008-07-04 16:59:29.000000000 -0400
> +++ linux-2.6-lttng/kernel/kthread.c	2008-07-04 17:38:16.000000000 -0400
> @@ -13,6 +13,7 @@
>  #include <linux/file.h>
>  #include <linux/module.h>
>  #include <linux/mutex.h>
> +#include "kernel-trace.h"
>  
>  #define KTHREAD_NICE_LEVEL (-5)
>  
> @@ -205,6 +206,8 @@ int kthread_stop(struct task_struct *k)
>  	/* It could exit after stop_info.k set, but before wake_up_process. */
>  	get_task_struct(k);
>  
> +	trace_kernel_kthread_stop(k);
> +
>  	/* Must init completion *before* thread sees kthread_stop_info.k */
>  	init_completion(&kthread_stop_info.done);
>  	smp_wmb();
> @@ -220,6 +223,8 @@ int kthread_stop(struct task_struct *k)
>  	ret = kthread_stop_info.err;
>  	mutex_unlock(&kthread_stop_lock);
>  
> +	trace_kernel_kthread_stop_ret(ret);
> +
>  	return ret;
>  }
>  EXPORT_SYMBOL(kthread_stop);
> Index: linux-2.6-lttng/kernel/printk.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/printk.c	2008-07-04 16:59:29.000000000 -0400
> +++ linux-2.6-lttng/kernel/printk.c	2008-07-04 17:38:16.000000000 -0400
> @@ -32,6 +32,7 @@
>  #include <linux/security.h>
>  #include <linux/bootmem.h>
>  #include <linux/syscalls.h>
> +#include "kernel-trace.h"
>  
>  #include <asm/uaccess.h>
>  
> @@ -610,6 +611,7 @@ asmlinkage int printk(const char *fmt, .
>  	int r;
>  
>  	va_start(args, fmt);
> +	trace_kernel_printk(__builtin_return_address(0));
>  	r = vprintk(fmt, args);
>  	va_end(args);
>  
> @@ -687,6 +689,9 @@ asmlinkage int vprintk(const char *fmt, 
>  	raw_local_irq_save(flags);
>  	this_cpu = smp_processor_id();
>  
> +	trace_kernel_vprintk(__builtin_return_address(0),
> +		printk_buf, printed_len);
> +
>  	/*
>  	 * Ouch, printk recursed into itself!
>  	 */
> Index: linux-2.6-lttng/kernel/sched.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/sched.c	2008-07-04 16:59:29.000000000 -0400
> +++ linux-2.6-lttng/kernel/sched.c	2008-07-04 17:38:16.000000000 -0400
> @@ -70,6 +70,7 @@
>  #include <linux/bootmem.h>
>  #include <linux/debugfs.h>
>  #include <linux/ctype.h>
> +#include "kernel-trace.h"
>  
>  #include <asm/tlb.h>
>  #include <asm/irq_regs.h>
> @@ -1806,6 +1807,7 @@ void wait_task_inactive(struct task_stru
>  		 * just go back and repeat.
>  		 */
>  		rq = task_rq_lock(p, &flags);
> +		trace_kernel_sched_wait_task(p);
>  		running = task_running(rq, p);
>  		on_rq = p->se.on_rq;
>  		task_rq_unlock(rq, &flags);
> @@ -2087,6 +2089,7 @@ static int try_to_wake_up(struct task_st
>  
>  	smp_wmb();
>  	rq = task_rq_lock(p, &flags);
> +	trace_kernel_sched_try_wakeup(p);
>  	old_state = p->state;
>  	if (!(old_state & state))
>  		goto out;
> @@ -2264,6 +2267,7 @@ void wake_up_new_task(struct task_struct
>  	struct rq *rq;
>  
>  	rq = task_rq_lock(p, &flags);
> +	trace_kernel_sched_wakeup_new_task(p);
>  	BUG_ON(p->state != TASK_RUNNING);
>  	update_rq_clock(rq);
>  
> @@ -2451,6 +2455,7 @@ context_switch(struct rq *rq, struct tas
>  	struct mm_struct *mm, *oldmm;
>  
>  	prepare_task_switch(rq, prev, next);
> +	trace_kernel_sched_schedule(prev, next);
>  	mm = next->mm;
>  	oldmm = prev->active_mm;
>  	/*
> @@ -2683,6 +2688,7 @@ static void sched_migrate_task(struct ta
>  	    || unlikely(cpu_is_offline(dest_cpu)))
>  		goto out;
>  
> +	trace_kernel_sched_migrate_task(p, dest_cpu);
>  	/* force the process onto the specified CPU */
>  	if (migrate_task(p, dest_cpu, &req)) {
>  		/* Need to wait for migration thread (might exit: take ref). */
> Index: linux-2.6-lttng/kernel/signal.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/signal.c	2008-07-04 16:59:29.000000000 -0400
> +++ linux-2.6-lttng/kernel/signal.c	2008-07-04 17:38:16.000000000 -0400
> @@ -26,6 +26,7 @@
>  #include <linux/freezer.h>
>  #include <linux/pid_namespace.h>
>  #include <linux/nsproxy.h>
> +#include "kernel-trace.h"
>  
>  #include <asm/param.h>
>  #include <asm/uaccess.h>
> @@ -807,6 +808,8 @@ static int send_signal(int sig, struct s
>  	struct sigpending *pending;
>  	struct sigqueue *q;
>  
> +	trace_kernel_signal_send(sig, t);
> +
>  	assert_spin_locked(&t->sighand->siglock);
>  	if (!prepare_signal(sig, t))
>  		return 0;
> Index: linux-2.6-lttng/kernel/softirq.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/softirq.c	2008-07-04 16:59:29.000000000 -0400
> +++ linux-2.6-lttng/kernel/softirq.c	2008-07-04 17:38:16.000000000 -0400
> @@ -21,6 +21,7 @@
>  #include <linux/rcupdate.h>
>  #include <linux/smp.h>
>  #include <linux/tick.h>
> +#include "kernel-trace.h"
>  
>  #include <asm/irq.h>
>  /*
> @@ -231,7 +232,9 @@ restart:
>  
>  	do {
>  		if (pending & 1) {
> +			trace_kernel_softirq_entry(h, softirq_vec);
>  			h->action(h);
> +			trace_kernel_softirq_exit(h, softirq_vec);
>  			rcu_bh_qsctr_inc(cpu);
>  		}
>  		h++;
> @@ -323,6 +326,7 @@ void irq_exit(void)
>   */
>  inline void raise_softirq_irqoff(unsigned int nr)
>  {
> +	trace_kernel_softirq_raise(nr);
>  	__raise_softirq_irqoff(nr);
>  
>  	/*
> @@ -412,7 +416,9 @@ static void tasklet_action(struct softir
>  			if (!atomic_read(&t->count)) {
>  				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
>  					BUG();
> +				trace_kernel_tasklet_low_entry(t);
>  				t->func(t->data);
> +				trace_kernel_tasklet_low_exit(t);
>  				tasklet_unlock(t);
>  				continue;
>  			}
> @@ -447,7 +453,9 @@ static void tasklet_hi_action(struct sof
>  			if (!atomic_read(&t->count)) {
>  				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
>  					BUG();
> +				trace_kernel_tasklet_high_entry(t);
>  				t->func(t->data);
> +				trace_kernel_tasklet_high_exit(t);
>  				tasklet_unlock(t);
>  				continue;
>  			}
> Index: linux-2.6-lttng/kernel/timer.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/timer.c	2008-07-04 16:59:29.000000000 -0400
> +++ linux-2.6-lttng/kernel/timer.c	2008-07-04 17:38:16.000000000 -0400
> @@ -37,12 +37,14 @@
>  #include <linux/delay.h>
>  #include <linux/tick.h>
>  #include <linux/kallsyms.h>
> +#include "kernel-trace.h"
>  
>  #include <asm/uaccess.h>
>  #include <asm/unistd.h>
>  #include <asm/div64.h>
>  #include <asm/timex.h>
>  #include <asm/io.h>
> +#include <asm/irq_regs.h>
>  
>  u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
>  
> @@ -288,6 +290,7 @@ static void internal_add_timer(struct tv
>  		i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
>  		vec = base->tv5.vec + i;
>  	}
> +	trace_kernel_timer_set(timer);
>  	/*
>  	 * Timers are FIFO:
>  	 */
> @@ -1074,6 +1077,7 @@ void do_timer(unsigned long ticks)
>  {
>  	jiffies_64 += ticks;
>  	update_times(ticks);
> +	trace_kernel_timer_update_time();
>  }
>  
>  #ifdef __ARCH_WANT_SYS_ALARM
> @@ -1155,7 +1159,9 @@ asmlinkage long sys_getegid(void)
>  
>  static void process_timeout(unsigned long __data)
>  {
> -	wake_up_process((struct task_struct *)__data);
> +	struct task_struct *task = (struct task_struct *)__data;
> +	trace_kernel_timer_timeout(task);
> +	wake_up_process(task);
>  }
>  
>  /**
> Index: linux-2.6-lttng/kernel/exit.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/exit.c	2008-07-04 16:59:29.000000000 -0400
> +++ linux-2.6-lttng/kernel/exit.c	2008-07-04 17:38:16.000000000 -0400
> @@ -45,6 +45,7 @@
>  #include <linux/resource.h>
>  #include <linux/blkdev.h>
>  #include <linux/task_io_accounting_ops.h>
> +#include "kernel-trace.h"
>  
>  #include <asm/uaccess.h>
>  #include <asm/unistd.h>
> @@ -148,6 +149,7 @@ static void __exit_signal(struct task_st
>  
>  static void delayed_put_task_struct(struct rcu_head *rhp)
>  {
> +	trace_kernel_process_free(container_of(rhp, struct task_struct, rcu));
>  	put_task_struct(container_of(rhp, struct task_struct, rcu));
>  }
>  
> @@ -1042,6 +1044,8 @@ NORET_TYPE void do_exit(long code)
>  
>  	if (group_dead)
>  		acct_process();
> +	trace_kernel_process_exit(tsk);
> +
>  	exit_sem(tsk);
>  	exit_files(tsk);
>  	exit_fs(tsk);
> @@ -1526,6 +1530,8 @@ static long do_wait(enum pid_type type, 
>  	struct task_struct *tsk;
>  	int flag, retval;
>  
> +	trace_kernel_process_wait(pid);
> +
>  	add_wait_queue(&current->signal->wait_chldexit,&wait);
>  repeat:
>  	/* If there is nothing that can match our critier just get out */
> Index: linux-2.6-lttng/kernel/fork.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/fork.c	2008-07-04 16:59:29.000000000 -0400
> +++ linux-2.6-lttng/kernel/fork.c	2008-07-04 17:38:16.000000000 -0400
> @@ -54,6 +54,7 @@
>  #include <linux/tty.h>
>  #include <linux/proc_fs.h>
>  #include <linux/blkdev.h>
> +#include "kernel-trace.h"
>  
>  #include <asm/pgtable.h>
>  #include <asm/pgalloc.h>
> @@ -1356,6 +1357,8 @@ long do_fork(unsigned long clone_flags,
>  	if (!IS_ERR(p)) {
>  		struct completion vfork;
>  
> +		trace_kernel_process_fork(current, p);
> +
>  		nr = task_pid_vnr(p);
>  
>  		if (clone_flags & CLONE_PARENT_SETTID)
> Index: linux-2.6-lttng/kernel/module.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/module.c	2008-07-04 16:59:29.000000000 -0400
> +++ linux-2.6-lttng/kernel/module.c	2008-07-04 17:38:16.000000000 -0400
> @@ -47,6 +47,7 @@
>  #include <linux/license.h>
>  #include <asm/sections.h>
>  #include <linux/tracepoint.h>
> +#include "kernel-trace.h"
>  
>  #if 0
>  #define DEBUGP printk
> @@ -1386,6 +1387,8 @@ static int __unlink_module(void *_mod)
>  /* Free a module, remove from lists, etc (must hold module_mutex). */
>  static void free_module(struct module *mod)
>  {
> +	trace_kernel_module_free(mod);
> +
>  	/* Delete from various lists */
>  	stop_machine_run(__unlink_module, mod, NR_CPUS);
>  	remove_notes_attrs(mod);
> @@ -2176,6 +2179,8 @@ static struct module *load_module(void _
>  	/* Get rid of temporary copy */
>  	vfree(hdr);
>  
> +	trace_kernel_module_load(mod);
> +
>  	/* Done! */
>  	return mod;
>  
> Index: linux-2.6-lttng/kernel/kernel-trace.h
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-lttng/kernel/kernel-trace.h	2008-07-04 17:38:16.000000000 -0400
> @@ -0,0 +1,106 @@
> +#ifndef _KERNEL_TRACE_H
> +#define _KERNEL_TRACE_H
> +
> +#include <linux/kdebug.h>
> +#include <linux/interrupt.h>
> +#include <linux/tracepoint.h>
> +
> +DEFINE_TRACE(kernel_irq_entry,
> +	TPPROTO(unsigned int id, struct pt_regs *regs),
> +	TPARGS(id, regs));
> +DEFINE_TRACE(kernel_irq_exit,
> +	TPPROTO(void),
> +	TPARGS());
> +DEFINE_TRACE(kernel_timer_itimer_expired,
> +	TPPROTO(struct signal_struct *sig),
> +	TPARGS(sig));
> +DEFINE_TRACE(kernel_timer_itimer_set,
> +	TPPROTO(int which, struct itimerval *value),
> +	TPARGS(which, value));
> +DEFINE_TRACE(kernel_kthread_stop,
> +	TPPROTO(struct task_struct *t),
> +	TPARGS(t));
> +DEFINE_TRACE(kernel_kthread_stop_ret,
> +	TPPROTO(int ret),
> +	TPARGS(ret));
> +DEFINE_TRACE(kernel_printk,
> +	TPPROTO(void *retaddr),
> +	TPARGS(retaddr));
> +DEFINE_TRACE(kernel_vprintk,
> +	TPPROTO(void *retaddr, char *buf, int len),
> +	TPARGS(retaddr, buf, len));
> +
> +/*
> + * Scheduler trace points.
> + */
> +DEFINE_TRACE(kernel_sched_wait_task,
> +	TPPROTO(struct task_struct *p),
> +	TPARGS(p));
> +DEFINE_TRACE(kernel_sched_try_wakeup,
> +	TPPROTO(struct task_struct *p),
> +	TPARGS(p));
> +DEFINE_TRACE(kernel_sched_wakeup_new_task,
> +	TPPROTO(struct task_struct *p),
> +	TPARGS(p));
> +DEFINE_TRACE(kernel_sched_schedule,
> +	TPPROTO(struct task_struct *prev, struct task_struct *next),
> +	TPARGS(prev, next));
> +DEFINE_TRACE(kernel_sched_migrate_task,
> +	TPPROTO(struct task_struct *p, int dest_cpu),
> +	TPARGS(p, dest_cpu));
> +
> +DEFINE_TRACE(kernel_signal_send,
> +	TPPROTO(int sig, struct task_struct *p),
> +	TPARGS(sig, p));
> +DEFINE_TRACE(kernel_softirq_entry,
> +	TPPROTO(struct softirq_action *h, struct softirq_action *softirq_vec),
> +	TPARGS(h, softirq_vec));
> +DEFINE_TRACE(kernel_softirq_exit,
> +	TPPROTO(struct softirq_action *h, struct softirq_action *softirq_vec),
> +	TPARGS(h, softirq_vec));
> +DEFINE_TRACE(kernel_softirq_raise,
> +	TPPROTO(unsigned int nr),
> +	TPARGS(nr));
> +DEFINE_TRACE(kernel_tasklet_low_entry,
> +	TPPROTO(struct tasklet_struct *t),
> +	TPARGS(t));
> +DEFINE_TRACE(kernel_tasklet_low_exit,
> +	TPPROTO(struct tasklet_struct *t),
> +	TPARGS(t));
> +DEFINE_TRACE(kernel_tasklet_high_entry,
> +	TPPROTO(struct tasklet_struct *t),
> +	TPARGS(t));
> +DEFINE_TRACE(kernel_tasklet_high_exit,
> +	TPPROTO(struct tasklet_struct *t),
> +	TPARGS(t));
> +DEFINE_TRACE(kernel_timer_set,
> +	TPPROTO(struct timer_list *timer),
> +	TPARGS(timer));
> +/*
> + * xtime_lock is taken when kernel_timer_update_time tracepoint is reached.
> + */
> +DEFINE_TRACE(kernel_timer_update_time,
> +	TPPROTO(void),
> +	TPARGS());
> +DEFINE_TRACE(kernel_timer_timeout,
> +	TPPROTO(struct task_struct *p),
> +	TPARGS(p));
> +DEFINE_TRACE(kernel_process_free,
> +	TPPROTO(struct task_struct *p),
> +	TPARGS(p));
> +DEFINE_TRACE(kernel_process_exit,
> +	TPPROTO(struct task_struct *p),
> +	TPARGS(p));
> +DEFINE_TRACE(kernel_process_wait,
> +	TPPROTO(struct pid *pid),
> +	TPARGS(pid));
> +DEFINE_TRACE(kernel_process_fork,
> +	TPPROTO(struct task_struct *parent, struct task_struct *child),
> +	TPARGS(parent, child));
> +DEFINE_TRACE(kernel_module_free,
> +	TPPROTO(struct module *mod),
> +	TPARGS(mod));
> +DEFINE_TRACE(kernel_module_load,
> +	TPPROTO(struct module *mod),
> +	TPARGS(mod));
> +#endif
> 

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC patch 05/12] LTTng instrumentation mm
  2008-07-05  9:42   ` KOSAKI Motohiro
@ 2008-07-07 20:38     ` Mathieu Desnoyers
  2008-07-11  8:36       ` KOSAKI Motohiro
  0 siblings, 1 reply; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-07 20:38 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt, linux-mm, Dave Hansen,
	Hideo AOKI, Takashi Nishiie, Masami Hiramatsu

* KOSAKI Motohiro (kosaki.motohiro@jp.fujitsu.com) wrote:
> > Memory management core events.
> > 
> > Added tracepoints :
> > 
> > mm_filemap_wait_end
> > mm_filemap_wait_start
> > mm_handle_fault_entry
> > mm_handle_fault_exit
> > mm_huge_page_alloc
> > mm_huge_page_free
> > mm_page_alloc
> > mm_page_free
> > mm_swap_file_close
> > mm_swap_file_open
> > mm_swap_in
> > mm_swap_out
> 

Hi Kosaki,

Thanks for this thorough review, please see comments below. Comments
without response will be addressed in the next tracepoint release.

> Mathieu, this patch is too large and have multiple change.
> memory subsystem have some feature and is developed by many people.
> 
> So, nobody can ack it.
> Could you split to more small patch?
> 
> and, this patch description is very poor.
> 
> I guess
> 
> > mm_filemap_wait_end
> > mm_filemap_wait_start
> 	for latency statics by lock_page delay
> 
> 	if so, we should know who have locking.
> 
> 
> > mm_handle_fault_entry
> > mm_handle_fault_exit
> 	??
> 	please explain.
> 
> > mm_page_alloc
> > mm_page_free
> 	for memory leak track
> 	for memory eater sort out
> 	etc..
> 
> > mm_huge_page_alloc
> > mm_huge_page_free
> 	ditto
> 	(but, huge page is developed by another person against normal page alloc
> 	 so, patch separating is better)
> 
> > mm_swap_file_close
> > mm_swap_file_open
> 	??
> 	What do you suppose usage?
> 
> > mm_swap_in
> > mm_swap_out
> 	for swap usage statics
> 	for swap delay accounting
> 
> 
> and, some tracepoint is putted on performance critical function.
> So, you should write performance result in patch description.
> 

Ok, I'll resend a new splitted version with better descriptions.

> 
> > Index: linux-2.6-lttng/mm/filemap.c
> > ===================================================================
> > --- linux-2.6-lttng.orig/mm/filemap.c	2008-07-04 18:26:02.000000000 -0400
> > +++ linux-2.6-lttng/mm/filemap.c	2008-07-04 18:26:37.000000000 -0400
> > @@ -33,6 +33,7 @@
> >  #include <linux/cpuset.h>
> >  #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
> >  #include <linux/memcontrol.h>
> > +#include "mm-trace.h"
> >  #include "internal.h"
> >  
> >  /*
> > @@ -540,9 +541,11 @@ void wait_on_page_bit(struct page *page,
> >  {
> >  	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
> >  
> > +	trace_mm_filemap_wait_start(page, bit_nr);
> >  	if (test_bit(bit_nr, &page->flags))
> >  		__wait_on_bit(page_waitqueue(page), &wait, sync_page,
> >  							TASK_UNINTERRUPTIBLE);
> > +	trace_mm_filemap_wait_end(page, bit_nr);
> >  }
> >  EXPORT_SYMBOL(wait_on_page_bit);
> 
> looks good to me.
> 
> 
> >  
> > Index: linux-2.6-lttng/mm/memory.c
> > ===================================================================
> > --- linux-2.6-lttng.orig/mm/memory.c	2008-07-04 18:26:02.000000000 -0400
> > +++ linux-2.6-lttng/mm/memory.c	2008-07-04 18:26:37.000000000 -0400
> > @@ -51,6 +51,7 @@
> >  #include <linux/init.h>
> >  #include <linux/writeback.h>
> >  #include <linux/memcontrol.h>
> > +#include "mm-trace.h"
> >  
> >  #include <asm/pgalloc.h>
> >  #include <asm/uaccess.h>
> > @@ -2201,6 +2202,7 @@ static int do_swap_page(struct mm_struct
> >  		/* Had to read the page from swap area: Major fault */
> >  		ret = VM_FAULT_MAJOR;
> >  		count_vm_event(PGMAJFAULT);
> > +		trace_mm_swap_in(page, entry);
> >  	}
> >  
> >  	if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
> 
> somebody want get swapin delaying statics.
> (see delayacct_set_flag() and delayacct_clear_flag())
> 
> if swap cache exist, swapin can end very faster.
> otherwise, spend very long time.
> 

I am not sure what you are asking for here ? A supplementary parameter
or another trace point ?

> 
> > +	trace_mm_handle_fault_entry(address, write_access);
> > +
> >  	__set_current_state(TASK_RUNNING);
> >  
> >  	count_vm_event(PGFAULT);
> 
> mm or vma passing is better?
> otherwise, adress is ambiguity.
> 

Adding both mm and vma.


> > -	if (unlikely(is_vm_hugetlb_page(vma)))
> > -		return hugetlb_fault(mm, vma, address, write_access);
> > +	if (unlikely(is_vm_hugetlb_page(vma))) {
> > +		res = hugetlb_fault(mm, vma, address, write_access);
> > +		goto end;
> > +	}
> >  
> >  	pgd = pgd_offset(mm, address);
> >  	pud = pud_alloc(mm, pgd, address);
> > -	if (!pud)
> > -		return VM_FAULT_OOM;
> > +	if (!pud) {
> > +		res = VM_FAULT_OOM;
> > +		goto end;
> > +	}
> >  	pmd = pmd_alloc(mm, pud, address);
> > -	if (!pmd)
> > -		return VM_FAULT_OOM;
> > +	if (!pmd) {
> > +		res = VM_FAULT_OOM;
> > +		goto end;
> > +	}
> >  	pte = pte_alloc_map(mm, pmd, address);
> > -	if (!pte)
> > -		return VM_FAULT_OOM;
> > +	if (!pte) {
> > +		res = VM_FAULT_OOM;
> > +		goto end;
> > +	}
> >  
> > -	return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
> > +	res = handle_pte_fault(mm, vma, address, pte, pmd, write_access);
> > +end:
> > +	trace_mm_handle_fault_exit();
> > +	return res;
> >  }
> 
> no argument?
> if two page fault happend in parallel, how do you sort out this two fault?
> 

By using the current thread identifier in the probe. A PF entry on a given
thread must be followed by a matching PF exit for that same thread.
There may be other events interleaved between the two. Multiple nested
page faults shouldn't but *could* happen. In this case, the outermost PF
goes with the outermose PF end, and the innermost PF goes with the
innermost PF end.

> and, IMHO res variable is very important.
> because it is OOM related.
> many MM trouble shooting is worked for OOM related.
> 

Ok, I'll add "res".

> 
> >  #ifndef __PAGETABLE_PUD_FOLDED
> > Index: linux-2.6-lttng/mm/page_alloc.c
> > ===================================================================
> > --- linux-2.6-lttng.orig/mm/page_alloc.c	2008-07-04 18:26:02.000000000 -0400
> > +++ linux-2.6-lttng/mm/page_alloc.c	2008-07-04 18:26:37.000000000 -0400
> > @@ -46,6 +46,7 @@
> >  #include <linux/page-isolation.h>
> >  #include <linux/memcontrol.h>
> >  #include <linux/debugobjects.h>
> > +#include "mm-trace.h"
> >  
> >  #include <asm/tlbflush.h>
> >  #include <asm/div64.h>
> > @@ -510,6 +511,8 @@ static void __free_pages_ok(struct page 
> >  	int i;
> >  	int reserved = 0;
> >  
> > +	trace_mm_page_free(page, order);
> > +
> >  	for (i = 0 ; i < (1 << order) ; ++i)
> >  		reserved += free_pages_check(page + i);
> >  	if (reserved)
> > @@ -966,6 +969,8 @@ static void free_hot_cold_page(struct pa
> >  	struct per_cpu_pages *pcp;
> >  	unsigned long flags;
> >  
> > +	trace_mm_page_free(page, 0);
> > +
> >  	if (PageAnon(page))
> >  		page->mapping = NULL;
> >  	if (free_pages_check(page))
> > @@ -1630,6 +1635,7 @@ nopage:
> >  		show_mem();
> >  	}
> >  got_pg:
> > +	trace_mm_page_alloc(page, order);
> >  	return page;
> >  }
> >  
> 
> please pass current task.
> I guess somebody need memory allocation tracking.
> 

Hrm.. "current" is available in the probe. Actually, it's available
anywhere in the kernel, do we really want to pass it on the stack ?

> 
> 
> > Index: linux-2.6-lttng/mm/page_io.c
> > ===================================================================
> > --- linux-2.6-lttng.orig/mm/page_io.c	2008-07-04 18:26:02.000000000 -0400
> > +++ linux-2.6-lttng/mm/page_io.c	2008-07-04 18:26:37.000000000 -0400
> > @@ -17,6 +17,7 @@
> >  #include <linux/bio.h>
> >  #include <linux/swapops.h>
> >  #include <linux/writeback.h>
> > +#include "mm-trace.h"
> >  #include <asm/pgtable.h>
> >  
> >  static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
> > @@ -114,6 +115,7 @@ int swap_writepage(struct page *page, st
> >  		rw |= (1 << BIO_RW_SYNC);
> >  	count_vm_event(PSWPOUT);
> >  	set_page_writeback(page);
> > +	trace_mm_swap_out(page);
> >  	unlock_page(page);
> >  	submit_bio(rw, bio);
> >  out:
> 
> this tracepoint probe swapout starting, right.
> So, Why you don't probe swapout end?
> 

Does submit_bio() block in this case or is it done asynchronously ? It's
of no use to trace swap out "end" when in fact there would be no
blocking involved.


> 
> 
> > Index: linux-2.6-lttng/mm/hugetlb.c
> > ===================================================================
> > --- linux-2.6-lttng.orig/mm/hugetlb.c	2008-07-04 18:26:02.000000000 -0400
> > +++ linux-2.6-lttng/mm/hugetlb.c	2008-07-04 18:26:37.000000000 -0400
> > @@ -14,6 +14,7 @@
> >  #include <linux/mempolicy.h>
> >  #include <linux/cpuset.h>
> >  #include <linux/mutex.h>
> > +#include "mm-trace.h"
> >  
> >  #include <asm/page.h>
> >  #include <asm/pgtable.h>
> > @@ -141,6 +142,7 @@ static void free_huge_page(struct page *
> >  	int nid = page_to_nid(page);
> >  	struct address_space *mapping;
> >  
> > +	trace_mm_huge_page_free(page);
> >  	mapping = (struct address_space *) page_private(page);
> >  	set_page_private(page, 0);
> >  	BUG_ON(page_count(page));
> > @@ -509,6 +511,7 @@ static struct page *alloc_huge_page(stru
> >  	if (!IS_ERR(page)) {
> >  		set_page_refcounted(page);
> >  		set_page_private(page, (unsigned long) mapping);
> > +		trace_mm_huge_page_alloc(page);
> >  	}
> >  	return page;
> >  }
> 
> this tracepoint probe to HugePages_Free change, right?
> Why you don't probe HugePages_Total and HugePages_Rsvd change?
> 

Adding trace_hugetlb_page_reserve(inode, from, to);
and
trace_hugetlb_page_unreserve(inode, offset, freed);

Do you recommend adding another tracing point to monitor the total
hugepages pool changes ?

> 
> > Index: linux-2.6-lttng/mm/swapfile.c
> > ===================================================================
> > --- linux-2.6-lttng.orig/mm/swapfile.c	2008-07-04 18:26:02.000000000 -0400
> > +++ linux-2.6-lttng/mm/swapfile.c	2008-07-04 18:26:37.000000000 -0400
> > @@ -32,6 +32,7 @@
> >  #include <asm/pgtable.h>
> >  #include <asm/tlbflush.h>
> >  #include <linux/swapops.h>
> > +#include "mm-trace.h"
> >  
> >  DEFINE_SPINLOCK(swap_lock);
> >  unsigned int nr_swapfiles;
> 
> > @@ -1310,6 +1311,7 @@ asmlinkage long sys_swapoff(const char _
> >  	swap_map = p->swap_map;
> >  	p->swap_map = NULL;
> >  	p->flags = 0;
> > +	trace_mm_swap_file_close(swap_file);
> >  	spin_unlock(&swap_lock);
> >  	mutex_unlock(&swapon_mutex);
> >  	vfree(swap_map);
> 
> Why you choose this point?

The idea is to monitor swap files so we can eventually know, from a
trace, which tracefiles were used during the trace and where they were
located. I also have a "swap file list" tracepoint which extracts all
the tracefile mappings which I plan to submit later. I normally execute
it at trace start.

> and why you don't pass pathname? (you pass it in sys_swapon()) 
> 

Since this other tracepoint gives me the mapping between file
descriptor and path name, the pathname becomes unnecessary.

> IMHO try_to_unuse cause many memory activity and spend many time and 
> often cause oom-killer.
> 
> I think this point log is needed by somebody.
> 

Should it be considered as part of swapoff ? If it is the case, then
maybe should we just move the trace_swap_file_close(swap_file); a little
be earlier so it is logged before the try_to_unuse() call ?

Mathieu

> 
> > @@ -1695,6 +1697,7 @@ asmlinkage long sys_swapon(const char __
> >  	} else {
> >  		swap_info[prev].next = p - swap_info;
> >  	}
> > +	trace_mm_swap_file_open(swap_file, name);
> >  	spin_unlock(&swap_lock);
> >  	mutex_unlock(&swapon_mutex);
> >  	error = 0;
> 
> 
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC patch 01/12] Kernel Tracepoints
  2008-07-07 16:27   ` Masami Hiramatsu
@ 2008-07-08 20:37     ` Masami Hiramatsu
  2008-07-09  3:03       ` Mathieu Desnoyers
  0 siblings, 1 reply; 24+ messages in thread
From: Masami Hiramatsu @ 2008-07-08 20:37 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt, Hideo AOKI, Takashi Nishiie,
	Alexander Viro

Hi Mathieu,

Masami Hiramatsu wrote:
[...]
>> +int tracepoint_probe_register(const char *name, void *probe)
>> +{
>> +	struct tracepoint_entry *entry;
>> +	int ret = 0;
>> +	void *old;
>> +
>> +	mutex_lock(&tracepoints_mutex);
>> +	entry = get_tracepoint(name);
>> +	if (!entry) {
>> +		entry = add_tracepoint(name);
>> +		if (IS_ERR(entry)) {
>> +			ret = PTR_ERR(entry);
>> +			goto end;
>> +		}
>> +	}
>> +	/*
>> +	 * If we detect that a call_rcu is pending for this tracepoint,
>> +	 * make sure it's executed now.
>> +	 */
>> +	if (entry->rcu_pending)
>> +		rcu_barrier();
>> +	old = tracepoint_entry_add_probe(entry, probe);
>> +	if (IS_ERR(old)) {
>> +		ret = PTR_ERR(old);
>> +		goto end;
>> +	}
>> +	mutex_unlock(&tracepoints_mutex);
>> +	tracepoint_update_probes();		/* may update entry */
>> +	mutex_lock(&tracepoints_mutex);
>> +	entry = get_tracepoint(name);
>> +	WARN_ON(!entry);

As I said in another patch, you might have to check
old != NULL here, because tracepoint_entry_add_probe() will
return NULL when you add a first probe to the entry.

>> +	entry->oldptr = old;
>> +	entry->rcu_pending = 1;
>> +	/* write rcu_pending before calling the RCU callback */
>> +	smp_wmb();
>> +#ifdef CONFIG_PREEMPT_RCU
>> +	synchronize_sched();	/* Until we have the call_rcu_sched() */
>> +#endif
>> +	call_rcu(&entry->rcu, free_old_closure);
>> +end:
>> +	mutex_unlock(&tracepoints_mutex);
>> +	return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(tracepoint_probe_register);
>> +
>> +/**
>> + * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
>> + * @name: tracepoint name
>> + * @probe: probe function pointer
>> + *
>> + * We do not need to call a synchronize_sched to make sure the probes have
>> + * finished running before doing a module unload, because the module unload
>> + * itself uses stop_machine(), which insures that every preempt disabled section
>> + * have finished.
>> + */
>> +int tracepoint_probe_unregister(const char *name, void *probe)
>> +{
>> +	struct tracepoint_entry *entry;
>> +	void *old;
>> +	int ret = -ENOENT;
>> +
>> +	mutex_lock(&tracepoints_mutex);
>> +	entry = get_tracepoint(name);
>> +	if (!entry)
>> +		goto end;
>> +	if (entry->rcu_pending)
>> +		rcu_barrier();
>> +	old = tracepoint_entry_remove_probe(entry, probe);
>> +	mutex_unlock(&tracepoints_mutex);
>> +	tracepoint_update_probes();		/* may update entry */
>> +	mutex_lock(&tracepoints_mutex);
>> +	entry = get_tracepoint(name);
>> +	if (!entry)
>> +		goto end;
>> +	entry->oldptr = old;
>> +	entry->rcu_pending = 1;
>> +	/* write rcu_pending before calling the RCU callback */
>> +	smp_wmb();
>> +#ifdef CONFIG_PREEMPT_RCU
>> +	synchronize_sched();	/* Until we have the call_rcu_sched() */
>> +#endif
>> +	call_rcu(&entry->rcu, free_old_closure);
>> +	remove_tracepoint(name);	/* Ignore busy error message */
>> +	ret = 0;
>> +end:
>> +	mutex_unlock(&tracepoints_mutex);
>> +	return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
>> +

On the other hand, tracepoint_entry_remove_probe() doesn't return NULL,
however, I think it might be better to introduce tracepoint_entry_free_old()
and simplify both of tracepoint_probe_register/unregister.

Thank you,
-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC patch 01/12] Kernel Tracepoints
  2008-07-08 20:37     ` Masami Hiramatsu
@ 2008-07-09  3:03       ` Mathieu Desnoyers
  0 siblings, 0 replies; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-09  3:03 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt, Hideo AOKI, Takashi Nishiie,
	Alexander Viro

* Masami Hiramatsu (mhiramat@redhat.com) wrote:
> Hi Mathieu,
> 
> Masami Hiramatsu wrote:
> [...]
> >> +int tracepoint_probe_register(const char *name, void *probe)
> >> +{
> >> +	struct tracepoint_entry *entry;
> >> +	int ret = 0;
> >> +	void *old;
> >> +
> >> +	mutex_lock(&tracepoints_mutex);
> >> +	entry = get_tracepoint(name);
> >> +	if (!entry) {
> >> +		entry = add_tracepoint(name);
> >> +		if (IS_ERR(entry)) {
> >> +			ret = PTR_ERR(entry);
> >> +			goto end;
> >> +		}
> >> +	}
> >> +	/*
> >> +	 * If we detect that a call_rcu is pending for this tracepoint,
> >> +	 * make sure it's executed now.
> >> +	 */
> >> +	if (entry->rcu_pending)
> >> +		rcu_barrier();
> >> +	old = tracepoint_entry_add_probe(entry, probe);
> >> +	if (IS_ERR(old)) {
> >> +		ret = PTR_ERR(old);
> >> +		goto end;
> >> +	}
> >> +	mutex_unlock(&tracepoints_mutex);
> >> +	tracepoint_update_probes();		/* may update entry */
> >> +	mutex_lock(&tracepoints_mutex);
> >> +	entry = get_tracepoint(name);
> >> +	WARN_ON(!entry);
> 
> As I said in another patch, you might have to check
> old != NULL here, because tracepoint_entry_add_probe() will
> return NULL when you add a first probe to the entry.
> 
> >> +	entry->oldptr = old;
> >> +	entry->rcu_pending = 1;
> >> +	/* write rcu_pending before calling the RCU callback */
> >> +	smp_wmb();
> >> +#ifdef CONFIG_PREEMPT_RCU
> >> +	synchronize_sched();	/* Until we have the call_rcu_sched() */
> >> +#endif
> >> +	call_rcu(&entry->rcu, free_old_closure);
> >> +end:
> >> +	mutex_unlock(&tracepoints_mutex);
> >> +	return ret;
> >> +}
> >> +EXPORT_SYMBOL_GPL(tracepoint_probe_register);
> >> +
> >> +/**
> >> + * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
> >> + * @name: tracepoint name
> >> + * @probe: probe function pointer
> >> + *
> >> + * We do not need to call a synchronize_sched to make sure the probes have
> >> + * finished running before doing a module unload, because the module unload
> >> + * itself uses stop_machine(), which insures that every preempt disabled section
> >> + * have finished.
> >> + */
> >> +int tracepoint_probe_unregister(const char *name, void *probe)
> >> +{
> >> +	struct tracepoint_entry *entry;
> >> +	void *old;
> >> +	int ret = -ENOENT;
> >> +
> >> +	mutex_lock(&tracepoints_mutex);
> >> +	entry = get_tracepoint(name);
> >> +	if (!entry)
> >> +		goto end;
> >> +	if (entry->rcu_pending)
> >> +		rcu_barrier();
> >> +	old = tracepoint_entry_remove_probe(entry, probe);
> >> +	mutex_unlock(&tracepoints_mutex);
> >> +	tracepoint_update_probes();		/* may update entry */
> >> +	mutex_lock(&tracepoints_mutex);
> >> +	entry = get_tracepoint(name);
> >> +	if (!entry)
> >> +		goto end;
> >> +	entry->oldptr = old;
> >> +	entry->rcu_pending = 1;
> >> +	/* write rcu_pending before calling the RCU callback */
> >> +	smp_wmb();
> >> +#ifdef CONFIG_PREEMPT_RCU
> >> +	synchronize_sched();	/* Until we have the call_rcu_sched() */
> >> +#endif
> >> +	call_rcu(&entry->rcu, free_old_closure);
> >> +	remove_tracepoint(name);	/* Ignore busy error message */
> >> +	ret = 0;
> >> +end:
> >> +	mutex_unlock(&tracepoints_mutex);
> >> +	return ret;
> >> +}
> >> +EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
> >> +
> 
> On the other hand, tracepoint_entry_remove_probe() doesn't return NULL,
> however, I think it might be better to introduce tracepoint_entry_free_old()
> and simplify both of tracepoint_probe_register/unregister.
> 

Yes, the cleanup makes sense and removes an unnecessary call to call_rcu
(which in fact simply kfree a NULL pointer, which is pointless). I'll
integrate this change.

Thanks,

Mathieu

> Thank you,
> -- 
> Masami Hiramatsu
> 
> Software Engineer
> Hitachi Computer Products (America) Inc.
> Software Solutions Division
> 
> e-mail: mhiramat@redhat.com
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC patch 05/12] LTTng instrumentation mm
  2008-07-07 20:38     ` Mathieu Desnoyers
@ 2008-07-11  8:36       ` KOSAKI Motohiro
  2008-07-11 14:17         ` Mathieu Desnoyers
  0 siblings, 1 reply; 24+ messages in thread
From: KOSAKI Motohiro @ 2008-07-11  8:36 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: kosaki.motohiro, akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt, linux-mm, Dave Hansen,
	Hideo AOKI, Takashi Nishiie, Masami Hiramatsu

Hi Mathieu,

sorry for late responce.
I went to business trip few days.


> Hi Kosaki,
> 
> Thanks for this thorough review, please see comments below. Comments
> without response will be addressed in the next tracepoint release.

thanks.


> > > Index: linux-2.6-lttng/mm/memory.c
> > > ===================================================================
> > > --- linux-2.6-lttng.orig/mm/memory.c	2008-07-04 18:26:02.000000000 -0400
> > > +++ linux-2.6-lttng/mm/memory.c	2008-07-04 18:26:37.000000000 -0400
> > > @@ -51,6 +51,7 @@
> > >  #include <linux/init.h>
> > >  #include <linux/writeback.h>
> > >  #include <linux/memcontrol.h>
> > > +#include "mm-trace.h"
> > >  
> > >  #include <asm/pgalloc.h>
> > >  #include <asm/uaccess.h>
> > > @@ -2201,6 +2202,7 @@ static int do_swap_page(struct mm_struct
> > >  		/* Had to read the page from swap area: Major fault */
> > >  		ret = VM_FAULT_MAJOR;
> > >  		count_vm_event(PGMAJFAULT);
> > > +		trace_mm_swap_in(page, entry);
> > >  	}
> > >  
> > >  	if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
> > 
> > somebody want get swapin delaying statics.
> > (see delayacct_set_flag() and delayacct_clear_flag())
> > 
> > if swap cache exist, swapin can end very faster.
> > otherwise, spend very long time.
> 
> I am not sure what you are asking for here ? A supplementary parameter
> or another trace point ?

Ah, Agreed with my explain is poor.
my intension was "another trace point".



> > > -	if (unlikely(is_vm_hugetlb_page(vma)))
> > > -		return hugetlb_fault(mm, vma, address, write_access);
> > > +	if (unlikely(is_vm_hugetlb_page(vma))) {
> > > +		res = hugetlb_fault(mm, vma, address, write_access);
> > > +		goto end;
> > > +	}
> > >  
> > >  	pgd = pgd_offset(mm, address);
> > >  	pud = pud_alloc(mm, pgd, address);
> > > -	if (!pud)
> > > -		return VM_FAULT_OOM;
> > > +	if (!pud) {
> > > +		res = VM_FAULT_OOM;
> > > +		goto end;
> > > +	}
> > >  	pmd = pmd_alloc(mm, pud, address);
> > > -	if (!pmd)
> > > -		return VM_FAULT_OOM;
> > > +	if (!pmd) {
> > > +		res = VM_FAULT_OOM;
> > > +		goto end;
> > > +	}
> > >  	pte = pte_alloc_map(mm, pmd, address);
> > > -	if (!pte)
> > > -		return VM_FAULT_OOM;
> > > +	if (!pte) {
> > > +		res = VM_FAULT_OOM;
> > > +		goto end;
> > > +	}
> > >  
> > > -	return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
> > > +	res = handle_pte_fault(mm, vma, address, pte, pmd, write_access);
> > > +end:
> > > +	trace_mm_handle_fault_exit();
> > > +	return res;
> > >  }
> > 
> > no argument?
> > if two page fault happend in parallel, how do you sort out this two fault?
> > 
> 
> By using the current thread identifier in the probe. A PF entry on a given
> thread must be followed by a matching PF exit for that same thread.
> There may be other events interleaved between the two. Multiple nested
> page faults shouldn't but *could* happen. In this case, the outermost PF
> goes with the outermose PF end, and the innermost PF goes with the
> innermost PF end.

okey.


> > and, IMHO res variable is very important.
> > because it is OOM related.
> > many MM trouble shooting is worked for OOM related.
> > 
> 
> Ok, I'll add "res".

Thanks.



> > > @@ -510,6 +511,8 @@ static void __free_pages_ok(struct page 
> > >  	int i;
> > >  	int reserved = 0;
> > >  
> > > +	trace_mm_page_free(page, order);
> > > +
> > >  	for (i = 0 ; i < (1 << order) ; ++i)
> > >  		reserved += free_pages_check(page + i);
> > >  	if (reserved)
> > > @@ -966,6 +969,8 @@ static void free_hot_cold_page(struct pa
> > >  	struct per_cpu_pages *pcp;
> > >  	unsigned long flags;
> > >  
> > > +	trace_mm_page_free(page, 0);
> > > +
> > >  	if (PageAnon(page))
> > >  		page->mapping = NULL;
> > >  	if (free_pages_check(page))
> > > @@ -1630,6 +1635,7 @@ nopage:
> > >  		show_mem();
> > >  	}
> > >  got_pg:
> > > +	trace_mm_page_alloc(page, order);
> > >  	return page;
> > >  }
> > >  
> > 
> > please pass current task.
> > I guess somebody need memory allocation tracking.
> > 
> 
> Hrm.. "current" is available in the probe. Actually, it's available
> anywhere in the kernel, do we really want to pass it on the stack ?

you are right.


> > >  static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
> > > @@ -114,6 +115,7 @@ int swap_writepage(struct page *page, st
> > >  		rw |= (1 << BIO_RW_SYNC);
> > >  	count_vm_event(PSWPOUT);
> > >  	set_page_writeback(page);
> > > +	trace_mm_swap_out(page);
> > >  	unlock_page(page);
> > >  	submit_bio(rw, bio);
> > >  out:
> > 
> > this tracepoint probe swapout starting, right.
> > So, Why you don't probe swapout end?
> > 
> 
> Does submit_bio() block in this case or is it done asynchronously ? It's
> of no use to trace swap out "end" when in fact there would be no
> blocking involved.

umm, ok, I should lern LTTng more.


> > > @@ -509,6 +511,7 @@ static struct page *alloc_huge_page(stru
> > >  	if (!IS_ERR(page)) {
> > >  		set_page_refcounted(page);
> > >  		set_page_private(page, (unsigned long) mapping);
> > > +		trace_mm_huge_page_alloc(page);
> > >  	}
> > >  	return page;
> > >  }
> > 
> > this tracepoint probe to HugePages_Free change, right?
> > Why you don't probe HugePages_Total and HugePages_Rsvd change?
> 
> Adding trace_hugetlb_page_reserve(inode, from, to);
> and
> trace_hugetlb_page_unreserve(inode, offset, freed);
> 
> Do you recommend adding another tracing point to monitor the total
> hugepages pool changes ?

Yes.
total number of hugepages can increase by sysctl.

So, it must be logged as swap_on/swap_off.
if it is not logged, freepages of hugepage meaning is ambiguity, IMHO.



> > > @@ -1310,6 +1311,7 @@ asmlinkage long sys_swapoff(const char _
> > >  	swap_map = p->swap_map;
> > >  	p->swap_map = NULL;
> > >  	p->flags = 0;
> > > +	trace_mm_swap_file_close(swap_file);
> > >  	spin_unlock(&swap_lock);
> > >  	mutex_unlock(&swapon_mutex);
> > >  	vfree(swap_map);
> > 
> > Why you choose this point?
> 
> The idea is to monitor swap files so we can eventually know, from a
> trace, which tracefiles were used during the trace and where they were
> located. I also have a "swap file list" tracepoint which extracts all
> the tracefile mappings which I plan to submit later. I normally execute
> it at trace start.

yeah, thank you good explain.


> > and why you don't pass pathname? (you pass it in sys_swapon()) 
> 
> Since this other tracepoint gives me the mapping between file
> descriptor and path name, the pathname becomes unnecessary.

it seems you said only LTTng log analyzer is cool.
but I hope tracepoint mechanism doesn't depent on LTTng.


> > IMHO try_to_unuse cause many memory activity and spend many time and 
> > often cause oom-killer.
> > 
> > I think this point log is needed by somebody.
> 
> Should it be considered as part of swapoff ? 

hmm, okey, you are right.
that is not swapoff.

> If it is the case, then
> maybe should we just move the trace_swap_file_close(swap_file); a little
> be earlier so it is logged before the try_to_unuse() call ?

No.
eventually, I will add to some VM activety tracepoint.
but that can separate swapoff tracepoint.

sorry for my confusion.






^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC patch 05/12] LTTng instrumentation mm
  2008-07-11  8:36       ` KOSAKI Motohiro
@ 2008-07-11 14:17         ` Mathieu Desnoyers
  0 siblings, 0 replies; 24+ messages in thread
From: Mathieu Desnoyers @ 2008-07-11 14:17 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: akpm, Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frank Ch. Eigler, Steven Rostedt, linux-mm, Dave Hansen,
	Hideo AOKI, Takashi Nishiie, Masami Hiramatsu

* KOSAKI Motohiro (kosaki.motohiro@jp.fujitsu.com) wrote:
> Hi Mathieu,
> 
> sorry for late responce.
> I went to business trip few days.
> 
> 
> > Hi Kosaki,
> > 
> > Thanks for this thorough review, please see comments below. Comments
> > without response will be addressed in the next tracepoint release.
> 
> thanks.
> 
> 
> > > > Index: linux-2.6-lttng/mm/memory.c
> > > > ===================================================================
> > > > --- linux-2.6-lttng.orig/mm/memory.c	2008-07-04 18:26:02.000000000 -0400
> > > > +++ linux-2.6-lttng/mm/memory.c	2008-07-04 18:26:37.000000000 -0400
> > > > @@ -51,6 +51,7 @@
> > > >  #include <linux/init.h>
> > > >  #include <linux/writeback.h>
> > > >  #include <linux/memcontrol.h>
> > > > +#include "mm-trace.h"
> > > >  
> > > >  #include <asm/pgalloc.h>
> > > >  #include <asm/uaccess.h>
> > > > @@ -2201,6 +2202,7 @@ static int do_swap_page(struct mm_struct
> > > >  		/* Had to read the page from swap area: Major fault */
> > > >  		ret = VM_FAULT_MAJOR;
> > > >  		count_vm_event(PGMAJFAULT);
> > > > +		trace_mm_swap_in(page, entry);
> > > >  	}
> > > >  
> > > >  	if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
> > > 
> > > somebody want get swapin delaying statics.
> > > (see delayacct_set_flag() and delayacct_clear_flag())
> > > 
> > > if swap cache exist, swapin can end very faster.
> > > otherwise, spend very long time.
> > 
> > I am not sure what you are asking for here ? A supplementary parameter
> > or another trace point ?
> 
> Ah, Agreed with my explain is poor.
> my intension was "another trace point".
> 

I see. You would like to know the duration of the page fault. Actually,
handle_mm_fault instrumentation gives you both the beginning and end of
page faults. Therefore, instrumenting two locations in swap_in would be
redundant.


> 
> 
> > > > -	if (unlikely(is_vm_hugetlb_page(vma)))
> > > > -		return hugetlb_fault(mm, vma, address, write_access);
> > > > +	if (unlikely(is_vm_hugetlb_page(vma))) {
> > > > +		res = hugetlb_fault(mm, vma, address, write_access);
> > > > +		goto end;
> > > > +	}
> > > >  
> > > >  	pgd = pgd_offset(mm, address);
> > > >  	pud = pud_alloc(mm, pgd, address);
> > > > -	if (!pud)
> > > > -		return VM_FAULT_OOM;
> > > > +	if (!pud) {
> > > > +		res = VM_FAULT_OOM;
> > > > +		goto end;
> > > > +	}
> > > >  	pmd = pmd_alloc(mm, pud, address);
> > > > -	if (!pmd)
> > > > -		return VM_FAULT_OOM;
> > > > +	if (!pmd) {
> > > > +		res = VM_FAULT_OOM;
> > > > +		goto end;
> > > > +	}
> > > >  	pte = pte_alloc_map(mm, pmd, address);
> > > > -	if (!pte)
> > > > -		return VM_FAULT_OOM;
> > > > +	if (!pte) {
> > > > +		res = VM_FAULT_OOM;
> > > > +		goto end;
> > > > +	}
> > > >  
> > > > -	return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
> > > > +	res = handle_pte_fault(mm, vma, address, pte, pmd, write_access);
> > > > +end:
> > > > +	trace_mm_handle_fault_exit();
> > > > +	return res;
> > > >  }
> > > 
> > > no argument?
> > > if two page fault happend in parallel, how do you sort out this two fault?
> > > 
> > 
> > By using the current thread identifier in the probe. A PF entry on a given
> > thread must be followed by a matching PF exit for that same thread.
> > There may be other events interleaved between the two. Multiple nested
> > page faults shouldn't but *could* happen. In this case, the outermost PF
> > goes with the outermose PF end, and the innermost PF goes with the
> > innermost PF end.
> 
> okey.
> 
> 
> > > and, IMHO res variable is very important.
> > > because it is OOM related.
> > > many MM trouble shooting is worked for OOM related.
> > > 
> > 
> > Ok, I'll add "res".
> 
> Thanks.
> 
> 
> 
> > > > @@ -510,6 +511,8 @@ static void __free_pages_ok(struct page 
> > > >  	int i;
> > > >  	int reserved = 0;
> > > >  
> > > > +	trace_mm_page_free(page, order);
> > > > +
> > > >  	for (i = 0 ; i < (1 << order) ; ++i)
> > > >  		reserved += free_pages_check(page + i);
> > > >  	if (reserved)
> > > > @@ -966,6 +969,8 @@ static void free_hot_cold_page(struct pa
> > > >  	struct per_cpu_pages *pcp;
> > > >  	unsigned long flags;
> > > >  
> > > > +	trace_mm_page_free(page, 0);
> > > > +
> > > >  	if (PageAnon(page))
> > > >  		page->mapping = NULL;
> > > >  	if (free_pages_check(page))
> > > > @@ -1630,6 +1635,7 @@ nopage:
> > > >  		show_mem();
> > > >  	}
> > > >  got_pg:
> > > > +	trace_mm_page_alloc(page, order);
> > > >  	return page;
> > > >  }
> > > >  
> > > 
> > > please pass current task.
> > > I guess somebody need memory allocation tracking.
> > > 
> > 
> > Hrm.. "current" is available in the probe. Actually, it's available
> > anywhere in the kernel, do we really want to pass it on the stack ?
> 
> you are right.
> 
> 
> > > >  static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
> > > > @@ -114,6 +115,7 @@ int swap_writepage(struct page *page, st
> > > >  		rw |= (1 << BIO_RW_SYNC);
> > > >  	count_vm_event(PSWPOUT);
> > > >  	set_page_writeback(page);
> > > > +	trace_mm_swap_out(page);
> > > >  	unlock_page(page);
> > > >  	submit_bio(rw, bio);
> > > >  out:
> > > 
> > > this tracepoint probe swapout starting, right.
> > > So, Why you don't probe swapout end?
> > > 
> > 
> > Does submit_bio() block in this case or is it done asynchronously ? It's
> > of no use to trace swap out "end" when in fact there would be no
> > blocking involved.
> 
> umm, ok, I should lern LTTng more.
> 
> 
> > > > @@ -509,6 +511,7 @@ static struct page *alloc_huge_page(stru
> > > >  	if (!IS_ERR(page)) {
> > > >  		set_page_refcounted(page);
> > > >  		set_page_private(page, (unsigned long) mapping);
> > > > +		trace_mm_huge_page_alloc(page);
> > > >  	}
> > > >  	return page;
> > > >  }
> > > 
> > > this tracepoint probe to HugePages_Free change, right?
> > > Why you don't probe HugePages_Total and HugePages_Rsvd change?
> > 
> > Adding trace_hugetlb_page_reserve(inode, from, to);
> > and
> > trace_hugetlb_page_unreserve(inode, offset, freed);
> > 
> > Do you recommend adding another tracing point to monitor the total
> > hugepages pool changes ?
> 
> Yes.
> total number of hugepages can increase by sysctl.
> 
> So, it must be logged as swap_on/swap_off.
> if it is not logged, freepages of hugepage meaning is ambiguity, IMHO.
> 

Ok, so I am adding :


static struct page *alloc_huge_page(struct vm_area_struct *vma,
                                    unsigned long addr)
  trace_hugetlb_page_alloc(page);

int hugetlb_reserve_pages(struct inode *inode, long from, long to)
  trace_hugetlb_pages_reserve(inode, from, to, ret);

void hugetlb_unreserve_pages(struct inode *inode, long offset, long
    freed)
  trace_hugetlb_pages_unreserve(inode, offset, freed);

static void update_and_free_page(struct page *page)
  trace_hugetlb_page_release(page);

static void free_huge_page(struct page *page)
  trace_hugetlb_page_free(page);

static struct page *alloc_fresh_huge_page_node(int nid)
  trace_hugetlb_page_grab(page);

static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
                                                unsigned long address)
  trace_hugetlb_buddy_pgalloc(page);


static struct page *alloc_huge_page(struct vm_area_struct *vma,
                                    unsigned long addr)
  trace_hugetlb_page_alloc(page);


It tracks pages taken from the page allocator and from the buddy
allocator, page released, pages reserved/unreserved and page alloc/free
within hugetlb. Does it seem more appropriate ? The only thing it does
not track is "surplus_huge_pages", which seems to be rather internal to
hugetlb. Do you think tracking it would be useful ?

> 
> 
> > > > @@ -1310,6 +1311,7 @@ asmlinkage long sys_swapoff(const char _
> > > >  	swap_map = p->swap_map;
> > > >  	p->swap_map = NULL;
> > > >  	p->flags = 0;
> > > > +	trace_mm_swap_file_close(swap_file);
> > > >  	spin_unlock(&swap_lock);
> > > >  	mutex_unlock(&swapon_mutex);
> > > >  	vfree(swap_map);
> > > 
> > > Why you choose this point?
> > 
> > The idea is to monitor swap files so we can eventually know, from a
> > trace, which tracefiles were used during the trace and where they were
> > located. I also have a "swap file list" tracepoint which extracts all
> > the tracefile mappings which I plan to submit later. I normally execute
> > it at trace start.
> 
> yeah, thank you good explain.
> 
> 
> > > and why you don't pass pathname? (you pass it in sys_swapon()) 
> > 
> > Since this other tracepoint gives me the mapping between file
> > descriptor and path name, the pathname becomes unnecessary.
> 
> it seems you said only LTTng log analyzer is cool.
> but I hope tracepoint mechanism doesn't depent on LTTng.
> 

No, the tracepoints are meant to be used by any in-kernel specialized or
module-based generic tracer, which includes ftrace and eventually
blktrace too.

> 
> > > IMHO try_to_unuse cause many memory activity and spend many time and 
> > > often cause oom-killer.
> > > 
> > > I think this point log is needed by somebody.
> > 
> > Should it be considered as part of swapoff ? 
> 
> hmm, okey, you are right.
> that is not swapoff.
> 
> > If it is the case, then
> > maybe should we just move the trace_swap_file_close(swap_file); a little
> > be earlier so it is logged before the try_to_unuse() call ?
> 
> No.
> eventually, I will add to some VM activety tracepoint.
> but that can separate swapoff tracepoint.
> 
> sorry for my confusion.
> 

No problem, thanks for the review!

Mathieu

> 
> 
> 
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2008-07-11 14:17 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-07-04 23:52 [RFC patch 00/12] Tracepoints v2 Mathieu Desnoyers
2008-07-04 23:52 ` [RFC patch 01/12] Kernel Tracepoints Mathieu Desnoyers
2008-07-07 16:27   ` Masami Hiramatsu
2008-07-08 20:37     ` Masami Hiramatsu
2008-07-09  3:03       ` Mathieu Desnoyers
2008-07-04 23:52 ` [RFC patch 02/12] LTTng tracepoint instrumentation fs Mathieu Desnoyers
2008-07-04 23:52 ` [RFC patch 03/12] LTTng instrumentation ipc Mathieu Desnoyers
2008-07-04 23:52 ` [RFC patch 04/12] LTTng instrumentation kernel Mathieu Desnoyers
2008-07-07 16:36   ` Masami Hiramatsu
2008-07-04 23:52 ` [RFC patch 05/12] LTTng instrumentation mm Mathieu Desnoyers
2008-07-05  9:42   ` KOSAKI Motohiro
2008-07-07 20:38     ` Mathieu Desnoyers
2008-07-11  8:36       ` KOSAKI Motohiro
2008-07-11 14:17         ` Mathieu Desnoyers
2008-07-04 23:52 ` [RFC patch 06/12] LTTng instrumentation net Mathieu Desnoyers
2008-07-04 23:52 ` [RFC patch 07/12] Traceprobes Mathieu Desnoyers
2008-07-07 16:28   ` Masami Hiramatsu
2008-07-04 23:52 ` [RFC patch 08/12] LTTng instrumentation FS tracepoint probes Mathieu Desnoyers
2008-07-04 23:52 ` [RFC patch 09/12] LTTng instrumentation ipc " Mathieu Desnoyers
2008-07-04 23:52 ` [RFC patch 10/12] LTTng instrumentation kernel " Mathieu Desnoyers
2008-07-04 23:52 ` [RFC patch 11/12] LTTng instrumentation mm " Mathieu Desnoyers
2008-07-04 23:52 ` [RFC patch 12/12] LTTng instrumentation net " Mathieu Desnoyers
2008-07-05 23:27 ` [RFC patch 00/12] Tracepoints v2 Eduard - Gabriel Munteanu
2008-07-07 13:43   ` Mathieu Desnoyers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).