linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/9] Conditional Calls - for 2.6.22-rc2-mm1
@ 2007-05-30 14:00 Mathieu Desnoyers
  2007-05-30 14:00 ` [patch 1/9] Conditional Calls - Architecture Independent Code Mathieu Desnoyers
                   ` (8 more replies)
  0 siblings, 9 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-30 14:00 UTC (permalink / raw)
  To: akpm, linux-kernel

Hi,

Here is the port of the conditional calls to 2.6.22-rc2-mm1.

Quoting my last post:

Following Andi Kleen's advice, I splitted the jump-over-call into a speparate
piece of infrastructure so it can be used more widely.

It also use a hash table to permit cond_call enable/disable both when the
cond_call is armed and when a module containing an armed cond_call is loaded.

Please add at the end of the 2.6.22-rc2-mm1 series,

conditional-calls-architecture-independent-code.patch
conditional-calls-hash-table.patch
conditional-calls-non-optimized-architectures.patch
conditional-calls-kconfig-menus.patch
conditional-calls-i386-optimization.patch
conditional-calls-powerpc-optimization.patch
conditional-calls-documentation.patch
#
f00f-bug-use-cond-calls.patch
profiling-use-cond-calls.patch

Mathieu


-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-05-30 14:00 [patch 0/9] Conditional Calls - for 2.6.22-rc2-mm1 Mathieu Desnoyers
@ 2007-05-30 14:00 ` Mathieu Desnoyers
  2007-05-30 20:32   ` Andrew Morton
                     ` (2 more replies)
  2007-05-30 14:00 ` [patch 2/9] Conditional Calls - Hash Table Mathieu Desnoyers
                   ` (7 subsequent siblings)
  8 siblings, 3 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-30 14:00 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers

[-- Attachment #1: conditional-calls-architecture-independent-code.patch --]
[-- Type: text/plain, Size: 14948 bytes --]

Conditional calls are used to compile in code that is meant to be dynamically
enabled at runtime. When code is disabled, it has a very small footprint.

It has a generic cond_call version and optimized per architecture cond_calls.
The optimized cond_call uses a load immediate to remove a data cache hit.

It adds a new rodata section "__cond_call" to place the pointers to the enable
value.

cond_call activation functions sits in module.c.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
 include/asm-generic/vmlinux.lds.h |   16 +-
 include/linux/condcall.h          |   91 +++++++++++++
 include/linux/module.h            |    4 
 kernel/module.c                   |  248 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 356 insertions(+), 3 deletions(-)

Index: linux-2.6-lttng/include/linux/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/linux/condcall.h	2007-05-17 02:13:53.000000000 -0400
@@ -0,0 +1,91 @@
+#ifndef _LINUX_CONDCALL_H
+#define _LINUX_CONDCALL_H
+
+/*
+ * Conditional function calls. Cheap when disabled, enabled at runtime.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#ifdef __KERNEL__
+
+struct __cond_call_struct {
+	const char *name;
+	void *enable;
+	int flags;
+} __attribute__((packed));
+
+
+/* Cond call flags : selects the mechanism used to enable the conditional calls
+ * and prescribe what can be executed within their function. This is primarily
+ * used at reentrancy-unfriendly sites. */
+#define CF_OPTIMIZED		(1 << 0) /* Use optimized cond_call */
+#define CF_LOCKDEP		(1 << 1) /* Can call lockdep */
+#define CF_PRINTK		(1 << 2) /* Probe can call vprintk */
+#define CF_STATIC_ENABLE	(1 << 3) /* Enable cond_call statically */
+#define _CF_NR			4
+
+#ifdef CONFIG_COND_CALL
+
+/* Generic cond_call flavor always available.
+ * Note : the empty asm volatile with read constraint is used here instead of a
+ * "used" attribute to fix a gcc 4.1.x bug. */
+#define cond_call_generic(flags, name, func) \
+	({ \
+		static const char __cstrtab_name_##name[] \
+		__attribute__((section("__cond_call_strings"))) = #name; \
+		static char __cond_call_enable_##name = \
+			(flags) & CF_STATIC_ENABLE; \
+		static const struct __cond_call_struct __cond_call_##name \
+			__attribute__((section("__cond_call"))) = \
+			{ __cstrtab_name_##name, \
+			&__cond_call_enable_##name, \
+			(flags) & ~CF_OPTIMIZED } ; \
+		asm volatile ( "" : : "i" (&__cond_call_##name)); \
+		(unlikely(__cond_call_enable_##name)) ? \
+			(func) : \
+			(__typeof__(func))0; \
+	})
+
+#define COND_CALL_GENERIC_ENABLE_IMMEDIATE_OFFSET 0
+#define COND_CALL_GENERIC_ENABLE_TYPE unsigned char
+/* Dereference enable as lvalue from a pointer to its instruction */
+#define COND_CALL_GENERIC_ENABLE(a) \
+	*(COND_CALL_GENERIC_ENABLE_TYPE*) \
+		((char*)a+COND_CALL_GENERIC_ENABLE_IMMEDIATE_OFFSET)
+
+static inline int cond_call_generic_set_enable(void *address, char enable)
+{
+	COND_CALL_GENERIC_ENABLE(address) = enable;
+	return 0;
+}
+
+#else /* !CONFIG_COND_CALL */
+#define cond_call_generic(flags, name, func) \
+	({ \
+		((flags) & CF_STATIC_ENABLE) ? \
+			(func) : \
+			(__typeof__(func))0; \
+	})
+#endif /* CONFIG_COND_CALL */
+
+#ifdef CONFIG_COND_CALL_ENABLE_OPTIMIZATION
+#include <asm/condcall.h>		/* optimized cond_call flavor */
+#else
+#include <asm-generic/condcall.h>	/* fallback on generic cond_call */
+#endif
+
+#define COND_CALL_MAX_FORMAT_LEN	1024
+
+extern int cond_call_arm(const char *name);
+extern int cond_call_disarm(const char *name);
+
+/* cond_call_query : Returns 1 if enabled, 0 if disabled or not present */
+extern int cond_call_query(const char *name);
+extern int cond_call_list(const char *name);
+
+#endif /* __KERNEL__ */
+#endif
Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2007-05-17 02:12:25.000000000 -0400
+++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2007-05-17 02:13:42.000000000 -0400
@@ -116,11 +116,22 @@
 		*(__kcrctab_gpl_future)					\
 		VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .;	\
 	}								\
-									\
+	/* Conditional calls: pointers */				\
+	__cond_call : AT(ADDR(__cond_call) - LOAD_OFFSET) {		\
+		VMLINUX_SYMBOL(__start___cond_call) = .;		\
+		*(__cond_call)						\
+		VMLINUX_SYMBOL(__stop___cond_call) = .;			\
+	}								\
 	/* Kernel symbol table: strings */				\
         __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {	\
 		*(__ksymtab_strings)					\
-	}								\
+ 	}								\
+	/* Conditional calls: strings */				\
+        __cond_call_strings : AT(ADDR(__cond_call_strings) - LOAD_OFFSET) { \
+		*(__cond_call_strings)					\
+ 	}								\
+	__end_rodata = .;						\
+	. = ALIGN(4096);						\
 									\
 	/* Built-in module parameters. */				\
 	__param : AT(ADDR(__param) - LOAD_OFFSET) {			\
@@ -228,4 +239,3 @@
   	*(.initcall6s.init)						\
   	*(.initcall7.init)						\
   	*(.initcall7s.init)
-
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h	2007-05-17 02:12:34.000000000 -0400
+++ linux-2.6-lttng/include/linux/module.h	2007-05-17 02:13:41.000000000 -0400
@@ -16,6 +16,7 @@
 #include <linux/stringify.h>
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
+#include <linux/condcall.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -356,6 +357,9 @@
 	/* The command line arguments (may be mangled).  People like
 	   keeping pointers to this stuff */
 	char *args;
+
+	const struct __cond_call_struct *cond_calls;
+	unsigned int num_cond_calls;
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c	2007-05-17 02:12:25.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c	2007-05-17 02:13:53.000000000 -0400
@@ -32,6 +32,7 @@
 #include <linux/cpu.h>
 #include <linux/moduleparam.h>
 #include <linux/errno.h>
+#include <linux/condcall.h>
 #include <linux/err.h>
 #include <linux/vermagic.h>
 #include <linux/notifier.h>
@@ -141,6 +142,8 @@
 extern const unsigned long __start___kcrctab_gpl_future[];
 extern const unsigned long __start___kcrctab_unused[];
 extern const unsigned long __start___kcrctab_unused_gpl[];
+extern const struct __cond_call_struct __start___cond_call[];
+extern const struct __cond_call_struct __stop___cond_call[];
 
 #ifndef CONFIG_MODVERSIONS
 #define symversion(base, idx) NULL
@@ -301,6 +304,230 @@
 	return NULL;
 }
 
+#ifdef CONFIG_COND_CALL
+
+/* Set the enable bit of the cond_call, choosing the generic or architecture
+ * specific functions depending on the cond_call's flags.
+ */
+static int cond_call_set_enable(void *address, char enable, int flags)
+{
+	if (flags & CF_OPTIMIZED)
+		return cond_call_optimized_set_enable(address, enable);
+	else
+		return cond_call_generic_set_enable(address, enable);
+}
+
+/* Query the state of a cond_calls range. */
+static int _cond_call_query_range(const char *name,
+	const struct __cond_call_struct *begin,
+	const struct __cond_call_struct *end)
+{
+	const struct __cond_call_struct *iter;
+	int ret = 0;
+
+	for (iter = begin; iter < end; iter++) {
+		if (strcmp(name, iter->name) != 0)
+			continue;
+		if (iter->flags & CF_OPTIMIZED)
+			ret = COND_CALL_OPTIMIZED_ENABLE(iter->enable);
+		else
+			ret = COND_CALL_GENERIC_ENABLE(iter->enable);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+/* Sets a range of cond_calls to a enabled state : set the enable bit. */
+static int _cond_call_arm_range(const char *name,
+	const struct __cond_call_struct *begin,
+	const struct __cond_call_struct *end)
+{
+	const struct __cond_call_struct *iter;
+	int found = 0;
+
+	for (iter = begin; iter < end; iter++) {
+		if (strcmp(name, iter->name) != 0)
+			continue;
+		cond_call_set_enable(iter->enable, 1, iter->flags);
+		found++;
+	}
+	return found;
+}
+
+/* Sets a range of cond_calls to a disabled state : unset the enable bit. */
+static int _cond_call_disarm_range(const char *name,
+	const struct __cond_call_struct *begin,
+	const struct __cond_call_struct *end)
+{
+	const struct __cond_call_struct *iter;
+	int found = 0;
+
+	for (iter = begin; iter < end; iter++) {
+		if (strcmp(name, iter->name) != 0)
+			continue;
+		cond_call_set_enable(iter->enable, 0, iter->flags);
+		found++;
+	}
+	return found;
+}
+
+/* Provides a listing of the cond_calls present in the kernel with their type
+ * (optimized or generic) and state (enabled or disabled). */
+static int _cond_call_list_range(const char *name,
+	const struct __cond_call_struct *begin,
+	const struct __cond_call_struct *end)
+{
+	const struct __cond_call_struct *iter;
+	int found = 0;
+
+	for (iter = begin; iter < end; iter++) {
+		if (name)
+			if (strcmp(name, iter->name) != 0)
+				continue;
+		printk("name %s \n", iter->name);
+		if (iter->flags & CF_OPTIMIZED)
+			printk("  enable %u optimized\n",
+				COND_CALL_OPTIMIZED_ENABLE(iter->enable));
+		else
+			printk("  enable %u generic\n",
+				COND_CALL_GENERIC_ENABLE(iter->enable));
+		found++;
+	}
+	return found;
+}
+
+/* Calls _cond_call_query_range for the core cond_calls and modules cond_calls.
+ */
+static int _cond_call_query(const char *name)
+{
+	struct module *mod;
+	int ret = 0;
+
+	/* Core kernel cond_calls */
+	ret = _cond_call_query_range(name,
+			__start___cond_call, __stop___cond_call);
+	if (ret)
+		return ret;
+	/* Cond_calls in modules. */
+	list_for_each_entry(mod, &modules, list) {
+		if (mod->taints)
+			continue;
+		ret = _cond_call_query_range(name,
+			mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+/* Cond_call enabling/disabling use the module_mutex to synchronize.
+ * Returns 1 if enabled, 0 if disabled or not present. */
+int cond_call_query(const char *name)
+{
+	int ret = 0;
+
+	mutex_lock(&module_mutex);
+	ret = _cond_call_query(name);
+	mutex_unlock(&module_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cond_call_query);
+
+/* Calls _cond_call_arm_range for the core cond_calls and modules cond_calls.
+ * FIXME : cond_call will not be active on modules loaded later than the
+ * cond_call_arm. */
+static int _cond_call_arm(const char *name)
+{
+	struct module *mod;
+	int found = 0;
+
+	/* Core kernel cond_calls */
+	found += _cond_call_arm_range(name,
+			__start___cond_call, __stop___cond_call);
+	/* Cond_calls in modules. */
+	list_for_each_entry(mod, &modules, list) {
+		if (mod->taints)
+			continue;
+		found += _cond_call_arm_range(name,
+			mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
+	}
+	return found;
+}
+
+/* Cond_call enabling/disabling use the module_mutex to synchronize. */
+int cond_call_arm(const char *name)
+{
+	int found = 0;
+
+	mutex_lock(&module_mutex);
+	found = _cond_call_arm(name);
+	mutex_unlock(&module_mutex);
+	return found;
+}
+EXPORT_SYMBOL_GPL(cond_call_arm);
+
+/* Calls _cond_call_disarm_range for the core cond_calls and modules cond_calls.
+ */
+static int _cond_call_disarm(const char *name)
+{
+	struct module *mod;
+	int found = 0;
+
+	/* Core kernel cond_calls */
+	found += _cond_call_disarm_range(name,
+			__start___cond_call, __stop___cond_call);
+	/* Cond_calls in modules. */
+	list_for_each_entry(mod, &modules, list) {
+		if (mod->taints)
+			continue;
+		found += _cond_call_disarm_range(name,
+			mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
+	}
+	return found;
+}
+
+/* Cond_call enabling/disabling use the module_mutex to synchronize. */
+int cond_call_disarm(const char *name)
+{
+	int found = 0;
+
+	mutex_lock(&module_mutex);
+	found = _cond_call_disarm(name);
+	mutex_unlock(&module_mutex);
+	return found;
+}
+EXPORT_SYMBOL_GPL(cond_call_disarm);
+
+/* Calls _cond_call_list_range for the core and module cond_calls.
+ * Cond call listing uses the module_mutex to synchronize.
+ * TODO : should output this listing to a procfs file. */
+int cond_call_list(const char *name)
+{
+	struct module *mod;
+	int found = 0;
+
+	mutex_lock(&module_mutex);
+	/* Core kernel cond_calls */
+	printk("Listing kernel cond_calls\n");
+	found += _cond_call_list_range(name,
+			__start___cond_call, __stop___cond_call);
+	/* Cond_calls in modules. */
+	printk("Listing module cond_calls\n");
+	list_for_each_entry(mod, &modules, list) {
+		if (!mod->taints) {
+			printk("Listing cond_calls for module %s\n", mod->name);
+			found += _cond_call_list_range(name, mod->cond_calls,
+				mod->cond_calls+mod->num_cond_calls);
+		}
+	}
+	mutex_unlock(&module_mutex);
+	return found;
+}
+EXPORT_SYMBOL_GPL(cond_call_list);
+
+#endif
+
 #ifdef CONFIG_SMP
 /* Number of blocks used and allocated. */
 static unsigned int pcpu_num_used, pcpu_num_allocated;
@@ -1658,6 +1885,8 @@
 	unsigned int unusedcrcindex;
 	unsigned int unusedgplindex;
 	unsigned int unusedgplcrcindex;
+	unsigned int condcallindex;
+	unsigned int condcallstringsindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1754,6 +1983,8 @@
 #ifdef ARCH_UNWIND_SECTION_NAME
 	unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
 #endif
+	condcallindex = find_sec(hdr, sechdrs, secstrings, "__cond_call");
+	condcallstringsindex = find_sec(hdr, sechdrs, secstrings, "__cond_call_strings");
 
 	/* Don't keep modinfo section */
 	sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1764,6 +1995,18 @@
 #endif
 	if (unwindex)
 		sechdrs[unwindex].sh_flags |= SHF_ALLOC;
+#ifdef CONFIG_COND_CALL
+	if (condcallindex)
+		sechdrs[condcallindex].sh_flags |= SHF_ALLOC;
+	if (condcallstringsindex)
+		sechdrs[condcallstringsindex].sh_flags |= SHF_ALLOC;
+#else
+	if (condcallindex)
+		sechdrs[condcallindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
+	if (condcallstringsindex)
+		sechdrs[condcallstringsindex].sh_flags
+					&= ~(unsigned long)SHF_ALLOC;
+#endif
 
 	/* Check module struct version now, before we try to use module. */
 	if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -1904,6 +2147,11 @@
 	mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
 	if (gplfuturecrcindex)
 		mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
+	if (condcallindex) {
+		mod->cond_calls = (void *)sechdrs[condcallindex].sh_addr;
+		mod->num_cond_calls =
+			sechdrs[condcallindex].sh_size / sizeof(*mod->cond_calls);
+	}
 
 	mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
 	if (unusedcrcindex)

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [patch 2/9] Conditional Calls - Hash Table
  2007-05-30 14:00 [patch 0/9] Conditional Calls - for 2.6.22-rc2-mm1 Mathieu Desnoyers
  2007-05-30 14:00 ` [patch 1/9] Conditional Calls - Architecture Independent Code Mathieu Desnoyers
@ 2007-05-30 14:00 ` Mathieu Desnoyers
  2007-05-30 20:32   ` Andrew Morton
  2007-05-31 13:42   ` Andi Kleen
  2007-05-30 14:00 ` [patch 3/9] Conditional Calls - Non Optimized Architectures Mathieu Desnoyers
                   ` (6 subsequent siblings)
  8 siblings, 2 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-30 14:00 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers

[-- Attachment #1: conditional-calls-hash-table.patch --]
[-- Type: text/plain, Size: 10654 bytes --]

Reimplementation of the cond calls which uses a hash table to hold the active
cond_calls. It permits to first arm a cond_call and then load supplementary
modules that contain this cond_call.

Without this, the order of loading a module containing a cond_call and arming a
cond_call matters and there is no symbol dependency to check this.

At module load time, each cond_call checks if it is enabled in the hash table
and is set accordingly.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
 include/linux/condcall.h |    2 
 kernel/module.c          |  205 +++++++++++++++++++++++++++++------------------
 2 files changed, 129 insertions(+), 78 deletions(-)

Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c	2007-05-17 01:42:50.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c	2007-05-17 01:46:42.000000000 -0400
@@ -33,6 +33,8 @@
 #include <linux/moduleparam.h>
 #include <linux/errno.h>
 #include <linux/condcall.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
 #include <linux/err.h>
 #include <linux/vermagic.h>
 #include <linux/notifier.h>
@@ -71,6 +73,20 @@
 
 static BLOCKING_NOTIFIER_HEAD(module_notify_list);
 
+#ifdef CONFIG_COND_CALL
+/* Conditional call hash table, containing the active cond_calls.
+ * Protected by module_mutex. */
+#define COND_CALL_HASH_BITS 6
+#define COND_CALL_TABLE_SIZE (1 << COND_CALL_HASH_BITS)
+
+struct cond_call_entry {
+	struct hlist_node hlist;
+	char name[0];
+};
+
+static struct hlist_head cond_call_table[COND_CALL_TABLE_SIZE];
+#endif //CONFIG_COND_CALL
+
 int register_module_notifier(struct notifier_block * nb)
 {
 	return blocking_notifier_chain_register(&module_notify_list, nb);
@@ -305,6 +321,68 @@
 }
 
 #ifdef CONFIG_COND_CALL
+/* Check if the cond_call is present in the hash table.
+ * Returns 1 if present, 0 if not. */
+static int hash_check_cond_call(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct cond_call_entry *e;
+	size_t len = strlen(name);
+	u32 hash = jhash(name, len, 0);
+
+	head = &cond_call_table[hash & ((1 << COND_CALL_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist)
+		if (!strcmp(name, e->name))
+			return 1;
+	return 0;
+}
+
+/* Add the cond_call to the hash table. Must be called with mutex_lock held. */
+static int hash_add_cond_call(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct cond_call_entry *e;
+	size_t len = strlen(name);
+	u32 hash = jhash(name, len, 0);
+
+	head = &cond_call_table[hash & ((1 << COND_CALL_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist)
+		if (!strcmp(name, e->name))
+			return 0;	/* Already there */
+	/* Using kmalloc here to allocate a variable length element. Could
+	 * cause some memory fragmentation if overused. */
+	e = kmalloc(sizeof(struct cond_call_entry) + len + 1, GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], name, len + 1);
+	hlist_add_head(&e->hlist, head);
+	return 0;
+}
+
+/* Remove the cond_call from the hash table. Must be called with mutex_lock
+ * held. */
+static void hash_remove_cond_call(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct cond_call_entry *e;
+	int found = 0;
+	size_t len = strlen(name);
+	u32 hash = jhash(name, len, 0);
+
+	head = &cond_call_table[hash & ((1 << COND_CALL_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist)
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			break;
+		}
+	if (found) {
+		hlist_del(&e->hlist);
+		kfree(e);
+	}
+}
 
 /* Set the enable bit of the cond_call, choosing the generic or architecture
  * specific functions depending on the cond_call's flags.
@@ -317,59 +395,53 @@
 		return cond_call_generic_set_enable(address, enable);
 }
 
-/* Query the state of a cond_calls range. */
-static int _cond_call_query_range(const char *name,
-	const struct __cond_call_struct *begin,
-	const struct __cond_call_struct *end)
+static int cond_call_get_enable(void *address, int flags)
+{
+	if (flags & CF_OPTIMIZED)
+		return COND_CALL_OPTIMIZED_ENABLE(address);
+	else
+		return COND_CALL_GENERIC_ENABLE(address);
+}
+
+/* Setup the cond_call according to the data present in the cond_call hash
+ * upon module load. */
+void module_cond_call_setup(struct module *mod)
 {
 	const struct __cond_call_struct *iter;
-	int ret = 0;
+	int enable;
 
-	for (iter = begin; iter < end; iter++) {
-		if (strcmp(name, iter->name) != 0)
-			continue;
-		if (iter->flags & CF_OPTIMIZED)
-			ret = COND_CALL_OPTIMIZED_ENABLE(iter->enable);
-		else
-			ret = COND_CALL_GENERIC_ENABLE(iter->enable);
-		if (ret)
-			break;
+	for (iter = mod->cond_calls;
+		iter < mod->cond_calls+mod->num_cond_calls; iter++) {
+		enable = hash_check_cond_call(iter->name);
+		if (enable != cond_call_get_enable(iter->enable, iter->flags))
+			cond_call_set_enable(iter->enable, enable, iter->flags);
 	}
-	return ret;
 }
 
 /* Sets a range of cond_calls to a enabled state : set the enable bit. */
-static int _cond_call_arm_range(const char *name,
+static void _cond_call_arm_range(const char *name,
 	const struct __cond_call_struct *begin,
 	const struct __cond_call_struct *end)
 {
 	const struct __cond_call_struct *iter;
-	int found = 0;
 
 	for (iter = begin; iter < end; iter++) {
-		if (strcmp(name, iter->name) != 0)
-			continue;
-		cond_call_set_enable(iter->enable, 1, iter->flags);
-		found++;
+		if (!strcmp(name, iter->name))
+			cond_call_set_enable(iter->enable, 1, iter->flags);
 	}
-	return found;
 }
 
 /* Sets a range of cond_calls to a disabled state : unset the enable bit. */
-static int _cond_call_disarm_range(const char *name,
+static void _cond_call_disarm_range(const char *name,
 	const struct __cond_call_struct *begin,
 	const struct __cond_call_struct *end)
 {
 	const struct __cond_call_struct *iter;
-	int found = 0;
 
 	for (iter = begin; iter < end; iter++) {
-		if (strcmp(name, iter->name) != 0)
-			continue;
-		cond_call_set_enable(iter->enable, 0, iter->flags);
-		found++;
+		if (!strcmp(name, iter->name))
+			cond_call_set_enable(iter->enable, 0, iter->flags);
 	}
-	return found;
 }
 
 /* Provides a listing of the cond_calls present in the kernel with their type
@@ -397,105 +469,79 @@
 	return found;
 }
 
-/* Calls _cond_call_query_range for the core cond_calls and modules cond_calls.
- */
-static int _cond_call_query(const char *name)
-{
-	struct module *mod;
-	int ret = 0;
-
-	/* Core kernel cond_calls */
-	ret = _cond_call_query_range(name,
-			__start___cond_call, __stop___cond_call);
-	if (ret)
-		return ret;
-	/* Cond_calls in modules. */
-	list_for_each_entry(mod, &modules, list) {
-		if (mod->taints)
-			continue;
-		ret = _cond_call_query_range(name,
-			mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
-		if (ret)
-			break;
-	}
-	return ret;
-}
-
 /* Cond_call enabling/disabling use the module_mutex to synchronize.
- * Returns 1 if enabled, 0 if disabled or not present. */
+ * Returns 1 if enabled, 0 if disabled. */
 int cond_call_query(const char *name)
 {
-	int ret = 0;
+	int ret;
 
 	mutex_lock(&module_mutex);
-	ret = _cond_call_query(name);
+	ret = hash_check_cond_call(name);
 	mutex_unlock(&module_mutex);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(cond_call_query);
 
-/* Calls _cond_call_arm_range for the core cond_calls and modules cond_calls.
- * FIXME : cond_call will not be active on modules loaded later than the
- * cond_call_arm. */
+/* Calls _cond_call_arm_range for the core cond_calls and modules cond_calls. */
 static int _cond_call_arm(const char *name)
 {
 	struct module *mod;
-	int found = 0;
+	int ret;
+
+	ret = hash_add_cond_call(name);
+	if (ret)
+		return ret;
 
 	/* Core kernel cond_calls */
-	found += _cond_call_arm_range(name,
+	_cond_call_arm_range(name,
 			__start___cond_call, __stop___cond_call);
 	/* Cond_calls in modules. */
 	list_for_each_entry(mod, &modules, list) {
 		if (mod->taints)
 			continue;
-		found += _cond_call_arm_range(name,
+		_cond_call_arm_range(name,
 			mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
 	}
-	return found;
+	return ret;
 }
 
 /* Cond_call enabling/disabling use the module_mutex to synchronize. */
 int cond_call_arm(const char *name)
 {
-	int found = 0;
+	int ret;
 
 	mutex_lock(&module_mutex);
-	found = _cond_call_arm(name);
+	ret = _cond_call_arm(name);
 	mutex_unlock(&module_mutex);
-	return found;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(cond_call_arm);
 
 /* Calls _cond_call_disarm_range for the core cond_calls and modules cond_calls.
  */
-static int _cond_call_disarm(const char *name)
+static void _cond_call_disarm(const char *name)
 {
 	struct module *mod;
-	int found = 0;
 
 	/* Core kernel cond_calls */
-	found += _cond_call_disarm_range(name,
+	_cond_call_disarm_range(name,
 			__start___cond_call, __stop___cond_call);
 	/* Cond_calls in modules. */
 	list_for_each_entry(mod, &modules, list) {
 		if (mod->taints)
 			continue;
-		found += _cond_call_disarm_range(name,
+		_cond_call_disarm_range(name,
 			mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
 	}
-	return found;
+	hash_remove_cond_call(name);
 }
 
 /* Cond_call enabling/disabling use the module_mutex to synchronize. */
-int cond_call_disarm(const char *name)
+void cond_call_disarm(const char *name)
 {
-	int found = 0;
-
 	mutex_lock(&module_mutex);
-	found = _cond_call_disarm(name);
+	_cond_call_disarm(name);
 	mutex_unlock(&module_mutex);
-	return found;
 }
 EXPORT_SYMBOL_GPL(cond_call_disarm);
 
@@ -525,7 +571,10 @@
 	return found;
 }
 EXPORT_SYMBOL_GPL(cond_call_list);
-
+#else
+static void module_cond_call_setup(struct module *mod)
+{
+}
 #endif
 
 #ifdef CONFIG_SMP
@@ -2211,6 +2260,8 @@
 
 	add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
 
+	module_cond_call_setup(mod);
+
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
Index: linux-2.6-lttng/include/linux/condcall.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/condcall.h	2007-05-17 01:42:50.000000000 -0400
+++ linux-2.6-lttng/include/linux/condcall.h	2007-05-17 01:46:42.000000000 -0400
@@ -81,7 +81,7 @@
 #define COND_CALL_MAX_FORMAT_LEN	1024
 
 extern int cond_call_arm(const char *name);
-extern int cond_call_disarm(const char *name);
+extern void cond_call_disarm(const char *name);
 
 /* cond_call_query : Returns 1 if enabled, 0 if disabled or not present */
 extern int cond_call_query(const char *name);

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [patch 3/9] Conditional Calls - Non Optimized Architectures
  2007-05-30 14:00 [patch 0/9] Conditional Calls - for 2.6.22-rc2-mm1 Mathieu Desnoyers
  2007-05-30 14:00 ` [patch 1/9] Conditional Calls - Architecture Independent Code Mathieu Desnoyers
  2007-05-30 14:00 ` [patch 2/9] Conditional Calls - Hash Table Mathieu Desnoyers
@ 2007-05-30 14:00 ` Mathieu Desnoyers
  2007-05-30 14:00 ` [patch 4/9] Conditional Calls - Add kconfig menus Mathieu Desnoyers
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-30 14:00 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers

[-- Attachment #1: conditional-calls-non-optimized-architectures.patch --]
[-- Type: text/plain, Size: 9947 bytes --]

Architecture agnostic, generic, version of the cond_calls.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
 include/asm-alpha/condcall.h     |    1 +
 include/asm-arm/condcall.h       |    1 +
 include/asm-arm26/condcall.h     |    1 +
 include/asm-cris/condcall.h      |    1 +
 include/asm-frv/condcall.h       |    1 +
 include/asm-generic/condcall.h   |   25 +++++++++++++++++++++++++
 include/asm-h8300/condcall.h     |    1 +
 include/asm-i386/condcall.h      |    1 +
 include/asm-ia64/condcall.h      |    1 +
 include/asm-m32r/condcall.h      |    1 +
 include/asm-m68k/condcall.h      |    1 +
 include/asm-m68knommu/condcall.h |    1 +
 include/asm-mips/condcall.h      |    1 +
 include/asm-parisc/condcall.h    |    1 +
 include/asm-powerpc/condcall.h   |    1 +
 include/asm-ppc/condcall.h       |    1 +
 include/asm-s390/condcall.h      |    1 +
 include/asm-sh/condcall.h        |    1 +
 include/asm-sh64/condcall.h      |    1 +
 include/asm-sparc/condcall.h     |    1 +
 include/asm-sparc64/condcall.h   |    1 +
 include/asm-um/condcall.h        |    1 +
 include/asm-v850/condcall.h      |    1 +
 include/asm-x86_64/condcall.h    |    1 +
 include/asm-xtensa/condcall.h    |    1 +
 25 files changed, 49 insertions(+)

Index: linux-2.6-lttng/include/asm-generic/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-generic/condcall.h	2007-05-29 11:56:07.000000000 -0400
@@ -0,0 +1,25 @@
+#ifndef _ASM_GENERIC_CONDCALL_H
+#define _ASM_GENERIC_CONDCALL_H
+
+/* Default flags, used by _cond_call() */
+#define CF_DEFAULT			0
+
+/* Fallback on the generic cond_call, since no optimized version is available */
+#define cond_call_optimized		cond_call_generic
+#define _cond_call(flags, name, func) cond_call_generic(flags, name, func)
+
+/* cond_call with default behavior */
+#define cond_call(name, func) _cond_call(CF_DEFAULT, name, func)
+
+/* Architecture dependant cond_call information, used internally for cond_call
+ * activation. */
+
+#define COND_CALL_OPTIMIZED_ENABLE_IMMEDIATE_OFFSET \
+		COND_CALL_GENERIC_ENABLE_IMMEDIATE_OFFSET
+#define COND_CALL_OPTIMIZED_ENABLE_TYPE	COND_CALL_GENERIC_ENABLE_TYPE
+/* Dereference enable as lvalue from a pointer to its instruction */
+#define COND_CALL_OPTIMIZED_ENABLE	COND_CALL_GENERIC_ENABLE
+
+#define cond_call_optimized_set_enable cond_call_generic_set_enable
+
+#endif /* _ASM_GENERIC_MARKER_H */
Index: linux-2.6-lttng/include/asm-alpha/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-alpha/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-arm/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-arm/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-arm26/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-arm26/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-cris/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-cris/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-frv/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-frv/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-h8300/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-h8300/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-ia64/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-ia64/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-m32r/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-m32r/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-m68k/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-m68k/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-m68knommu/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-m68knommu/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-mips/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-mips/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-parisc/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-parisc/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-ppc/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-ppc/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-s390/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-s390/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-sh/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-sh/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-sh64/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-sh64/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-sparc/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-sparc/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-sparc64/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-sparc64/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-um/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-um/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-v850/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-v850/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-x86_64/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-x86_64/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-xtensa/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-xtensa/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-i386/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-i386/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>
Index: linux-2.6-lttng/include/asm-powerpc/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-powerpc/condcall.h	2007-05-29 11:55:58.000000000 -0400
@@ -0,0 +1 @@
+#include <asm-generic/condcall.h>

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [patch 4/9] Conditional Calls - Add kconfig menus
  2007-05-30 14:00 [patch 0/9] Conditional Calls - for 2.6.22-rc2-mm1 Mathieu Desnoyers
                   ` (2 preceding siblings ...)
  2007-05-30 14:00 ` [patch 3/9] Conditional Calls - Non Optimized Architectures Mathieu Desnoyers
@ 2007-05-30 14:00 ` Mathieu Desnoyers
  2007-05-30 14:00 ` [patch 5/9] Conditional Calls - i386 Optimization Mathieu Desnoyers
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-30 14:00 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers, Adrian Bunk, Andi Kleen

[-- Attachment #1: conditional-calls-kconfig-menus.patch --]
[-- Type: text/plain, Size: 14381 bytes --]

Conditional calls provide a way to compile in kernels features that can be
enabled dynamically, with a very small footprint when disabled.

This patch:

Add Kconfig menus for the marker code.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Adrian Bunk <bunk@stusta.de>
CC: Andi Kleen <andi@firstfloor.org>
---

 arch/alpha/Kconfig       |    6 ++++++
 arch/arm/Kconfig         |    6 ++++++
 arch/arm26/Kconfig       |    6 ++++++
 arch/avr32/Kconfig.debug |    7 +++++++
 arch/cris/Kconfig        |    6 ++++++
 arch/frv/Kconfig         |    6 ++++++
 arch/h8300/Kconfig       |    6 ++++++
 arch/i386/Kconfig        |    2 ++
 arch/ia64/Kconfig        |    3 +++
 arch/m32r/Kconfig        |    6 ++++++
 arch/m68k/Kconfig        |    6 ++++++
 arch/m68knommu/Kconfig   |    6 ++++++
 arch/mips/Kconfig        |    6 ++++++
 arch/parisc/Kconfig      |    6 ++++++
 arch/powerpc/Kconfig     |    3 +++
 arch/ppc/Kconfig         |    6 ++++++
 arch/s390/Kconfig        |    2 ++
 arch/sh/Kconfig          |    6 ++++++
 arch/sh64/Kconfig        |    6 ++++++
 arch/sparc/Kconfig       |    2 ++
 arch/sparc64/Kconfig     |    3 +++
 arch/um/Kconfig          |    6 ++++++
 arch/v850/Kconfig        |    6 ++++++
 arch/x86_64/Kconfig      |    3 +++
 arch/xtensa/Kconfig      |    6 ++++++
 kernel/Kconfig.condcall  |   20 ++++++++++++++++++++
 26 files changed, 147 insertions(+)

Index: linux-2.6-lttng/arch/alpha/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/alpha/Kconfig	2007-05-30 08:42:52.000000000 -0400
+++ linux-2.6-lttng/arch/alpha/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -653,6 +653,12 @@
 
 source "arch/alpha/oprofile/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/alpha/Kconfig.debug"
 
 # DUMMY_CONSOLE may be defined in drivers/video/console/Kconfig
Index: linux-2.6-lttng/arch/arm/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/arm/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/arm/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -1042,6 +1042,12 @@
 
 source "arch/arm/oprofile/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/arm/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/arm26/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/arm26/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/arm26/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -241,6 +241,12 @@
 
 source "drivers/usb/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/arm26/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/cris/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/cris/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/cris/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -198,6 +198,12 @@
 
 source "drivers/usb/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/cris/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/frv/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/frv/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/frv/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -375,6 +375,12 @@
 
 source "fs/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/frv/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/h8300/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/h8300/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/h8300/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -220,6 +220,12 @@
 
 source "fs/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/h8300/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/i386/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/i386/Kconfig	2007-05-30 08:42:53.000000000 -0400
+++ linux-2.6-lttng/arch/i386/Kconfig	2007-05-30 08:44:45.000000000 -0400
@@ -1233,6 +1233,8 @@
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
 
+source "kernel/Kconfig.condcall"
+
 endif # INSTRUMENTATION
 
 source "arch/i386/Kconfig.debug"
Index: linux-2.6-lttng/arch/ia64/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/ia64/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/ia64/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -590,6 +590,9 @@
 	  a probepoint and specifies the callback.  Kprobes is useful
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
+
+source "kernel/Kconfig.condcall"
+
 endmenu
 
 source "arch/ia64/Kconfig.debug"
Index: linux-2.6-lttng/arch/m32r/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/m32r/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/m32r/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -405,6 +405,12 @@
 
 source "arch/m32r/oprofile/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/m32r/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/m68k/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/m68k/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/m68k/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -670,6 +670,12 @@
 
 source "fs/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/m68k/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/m68knommu/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/m68knommu/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/m68knommu/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -668,6 +668,12 @@
 
 source "fs/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/m68knommu/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/mips/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/mips/Kconfig	2007-05-30 08:42:36.000000000 -0400
+++ linux-2.6-lttng/arch/mips/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -1957,6 +1957,12 @@
 
 source "arch/mips/oprofile/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/mips/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/parisc/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/parisc/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/parisc/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -269,6 +269,12 @@
 
 source "arch/parisc/oprofile/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/parisc/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/powerpc/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/powerpc/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/powerpc/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -903,6 +903,9 @@
 	  a probepoint and specifies the callback.  Kprobes is useful
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
+
+source "kernel/Kconfig.condcall"
+
 endmenu
 
 source "arch/powerpc/Kconfig.debug"
Index: linux-2.6-lttng/arch/ppc/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/ppc/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/ppc/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -1451,8 +1451,14 @@
 
 source "lib/Kconfig"
 
+menu "Instrumentation Support"
+
 source "arch/powerpc/oprofile/Kconfig"
 
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/ppc/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/s390/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/s390/Kconfig	2007-05-30 08:43:05.000000000 -0400
+++ linux-2.6-lttng/arch/s390/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -549,6 +549,8 @@
 
 source "lib/Kconfig.statistic"
 
+source "kernel/Kconfig.condcall"
+
 endmenu
 
 source "arch/s390/Kconfig.debug"
Index: linux-2.6-lttng/arch/sh/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/sh/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/sh/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -721,6 +721,12 @@
 
 source "arch/sh/oprofile/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/sh/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/sh64/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/sh64/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/sh64/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -284,6 +284,12 @@
 
 source "arch/sh64/oprofile/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/sh64/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/sparc/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/sparc/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/sparc/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -306,6 +306,8 @@
 
 source "arch/sparc/oprofile/Kconfig"
 
+source "kernel/Kconfig.condcall"
+
 endmenu
 
 source "arch/sparc/Kconfig.debug"
Index: linux-2.6-lttng/arch/sparc64/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/sparc64/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/sparc64/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -438,6 +438,9 @@
 	  a probepoint and specifies the callback.  Kprobes is useful
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
+
+source "kernel/Kconfig.condcall"
+
 endmenu
 
 source "arch/sparc64/Kconfig.debug"
Index: linux-2.6-lttng/arch/um/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/um/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/um/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -336,4 +336,10 @@
 	bool
 	default n
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/um/Kconfig.debug"
Index: linux-2.6-lttng/arch/v850/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/v850/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/v850/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -331,6 +331,12 @@
 
 source "drivers/usb/Kconfig"
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/v850/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/arch/x86_64/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/x86_64/Kconfig	2007-05-30 08:42:48.000000000 -0400
+++ linux-2.6-lttng/arch/x86_64/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -794,6 +794,9 @@
 	  a probepoint and specifies the callback.  Kprobes is useful
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
+
+source "kernel/Kconfig.condcall"
+
 endmenu
 
 source "arch/x86_64/Kconfig.debug"
Index: linux-2.6-lttng/arch/xtensa/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/xtensa/Kconfig	2007-05-30 08:42:35.000000000 -0400
+++ linux-2.6-lttng/arch/xtensa/Kconfig	2007-05-30 08:43:43.000000000 -0400
@@ -251,6 +251,12 @@
 	  provide one yourself.
 endmenu
 
+menu "Instrumentation Support"
+
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 source "arch/xtensa/Kconfig.debug"
 
 source "security/Kconfig"
Index: linux-2.6-lttng/kernel/Kconfig.condcall
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/Kconfig.condcall	2007-05-30 08:43:43.000000000 -0400
@@ -0,0 +1,20 @@
+# Code markers configuration
+
+config COND_CALL
+	bool "Activate conditional calls"
+	depends on MODULES
+	help
+	  Provides a way to dynamically enable kernel features while having a
+	  very small footprint when disabled.
+
+config COND_CALL_DISABLE_OPTIMIZATION
+	bool "Disable conditional calls optimization"
+	depends on COND_CALL && EMBEDDED
+	default n
+	help
+	  Disable code replacement jump optimisations. Especially useful if your
+	  code is in a read-only rom/flash.
+
+config COND_CALL_ENABLE_OPTIMIZATION
+	def_bool y
+	depends on COND_CALL && !COND_CALL_DISABLE_OPTIMIZATION
Index: linux-2.6-lttng/arch/avr32/Kconfig.debug
===================================================================
--- linux-2.6-lttng.orig/arch/avr32/Kconfig.debug	2007-05-30 08:28:44.000000000 -0400
+++ linux-2.6-lttng/arch/avr32/Kconfig.debug	2007-05-30 08:43:43.000000000 -0400
@@ -6,6 +6,9 @@
 
 source "lib/Kconfig.debug"
 
+menu "Instrumentation Support"
+	depends on EXPERIMENTAL
+
 config KPROBES
 	bool "Kprobes"
 	depends on DEBUG_KERNEL
@@ -16,4 +19,8 @@
           for kernel debugging, non-intrusive instrumentation and testing.
           If in doubt, say "N".
 
+source "kernel/Kconfig.condcall"
+
+endmenu
+
 endmenu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [patch 5/9] Conditional Calls - i386 Optimization
  2007-05-30 14:00 [patch 0/9] Conditional Calls - for 2.6.22-rc2-mm1 Mathieu Desnoyers
                   ` (3 preceding siblings ...)
  2007-05-30 14:00 ` [patch 4/9] Conditional Calls - Add kconfig menus Mathieu Desnoyers
@ 2007-05-30 14:00 ` Mathieu Desnoyers
  2007-05-30 20:33   ` Andrew Morton
  2007-05-31 13:54   ` Andi Kleen
  2007-05-30 14:00 ` [patch 6/9] Conditional Calls - PowerPC Optimization Mathieu Desnoyers
                   ` (3 subsequent siblings)
  8 siblings, 2 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-30 14:00 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers

[-- Attachment #1: conditional-calls-i386-optimization.patch --]
[-- Type: text/plain, Size: 9294 bytes --]

i386 optimization of the cond_calls which uses a movb with code patching to
set/unset the value used to populate the register used for the branch test.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
 arch/i386/kernel/Makefile   |    1 
 arch/i386/kernel/condcall.c |  140 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-i386/condcall.h |   69 +++++++++++++++++++++
 3 files changed, 209 insertions(+), 1 deletion(-)

Index: linux-2.6-lttng/include/asm-i386/condcall.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-i386/condcall.h	2007-05-17 01:49:46.000000000 -0400
+++ linux-2.6-lttng/include/asm-i386/condcall.h	2007-05-17 01:52:38.000000000 -0400
@@ -1 +1,68 @@
-#include <asm-generic/condcall.h>
+#ifndef _ASM_I386_CONDCALL_H
+#define _ASM_I386_CONDCALL_H
+
+/*
+ * Conditional function calls. i386 architecture optimizations.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+
+#ifdef CONFIG_COND_CALL
+
+#define CF_DEFAULT (CF_OPTIMIZED | CF_LOCKDEP | CF_PRINTK)
+
+/* Optimized version of the cond_call. Passing the flags as a pointer to
+ * the inline assembly to trick it into putting the flags value as third
+ * parameter in the structure. */
+#define cond_call_optimized(flags, name, func) \
+	({ \
+		static const char __cstrtab_name_##name[] \
+		__attribute__((section("__cond_call_strings"))) = #name; \
+		char condition; \
+		asm (	".section __cond_call, \"a\", @progbits;\n\t" \
+					".long %1, 0f, %2;\n\t" \
+					".previous;\n\t" \
+					".align 2\n\t" \
+					"0:\n\t" \
+					"movb %3,%0;\n\t" \
+				: "=r" (condition) \
+				: "m" (*__cstrtab_name_##name), \
+				  "m" (*(char*)flags), \
+				  "i" ((flags) & CF_STATIC_ENABLE)); \
+		(likely(!condition)) ? \
+			(__typeof__(func))0 : \
+			(func); \
+	})
+
+/* cond_call macro selecting the generic or optimized version of cond_call,
+ * depending on the flags specified. */
+#define _cond_call(flags, name, func) \
+({ \
+	(((flags) & CF_LOCKDEP) && ((flags) & CF_OPTIMIZED)) ? \
+		cond_call_optimized(flags, name, func) : \
+		cond_call_generic(flags, name, func); \
+})
+
+/* cond_call with default behavior */
+#define cond_call(name, func) _cond_call(CF_DEFAULT, name, func)
+
+/* Architecture dependant cond_call information, used internally for cond_call
+ * activation. */
+
+/* Offset of the immediate value from the start of the movb instruction, in
+ * bytes. */
+#define COND_CALL_OPTIMIZED_ENABLE_IMMEDIATE_OFFSET 1
+#define COND_CALL_OPTIMIZED_ENABLE_TYPE unsigned char
+/* Dereference enable as lvalue from a pointer to its instruction */
+#define COND_CALL_OPTIMIZED_ENABLE(a) \
+	*(COND_CALL_OPTIMIZED_ENABLE_TYPE*) \
+		((char*)a+COND_CALL_OPTIMIZED_ENABLE_IMMEDIATE_OFFSET)
+
+extern int cond_call_optimized_set_enable(void *address, char enable);
+
+#endif
+#endif //_ASM_I386_CONDCALL_H
Index: linux-2.6-lttng/arch/i386/kernel/Makefile
===================================================================
--- linux-2.6-lttng.orig/arch/i386/kernel/Makefile	2007-05-17 01:42:46.000000000 -0400
+++ linux-2.6-lttng/arch/i386/kernel/Makefile	2007-05-17 01:52:38.000000000 -0400
@@ -33,6 +33,7 @@
 obj-y				+= sysenter.o vsyscall.o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat.o
 obj-$(CONFIG_EFI) 		+= efi.o efi_stub.o
+obj-$(CONFIG_COND_CALL_ENABLE_OPTIMIZATION)	+= condcall.o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault.o
 obj-$(CONFIG_SERIAL_8250)	+= legacy_serial.o
 obj-$(CONFIG_VM86)		+= vm86.o
Index: linux-2.6-lttng/arch/i386/kernel/condcall.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/arch/i386/kernel/condcall.c	2007-05-17 01:52:38.000000000 -0400
@@ -0,0 +1,140 @@
+/* condcall.c
+ *
+ * - Erratum 49 fix for Intel PIII.
+ * - Still present on newer processors : Intel Core 2 Duo Processor for Intel
+ *   Centrino Duo Processor Technology Specification Update, AH33.
+ *   Unsynchronized Cross-Modifying Code Operations Can Cause Unexpected
+ *   Instruction Execution Results.
+ *
+ * Permits cond_call activation by XMC with correct serialization.
+ *
+ * Reentrant for NMI and trap handler instrumentation. Permits XMC to a
+ * location that has preemption enabled because it involves no temporary or
+ * reused data structure.
+ *
+ * Quoting Richard J Moore, source of the information motivating this
+ * implementation which differs from the one proposed by Intel which is not
+ * suitable for kernel context (does not support NMI and would require disabling
+ * interrupts on every CPU for a long period) :
+ *
+ * "There is another issue to consider when looking into using probes other
+ * then int3:
+ *
+ * Intel erratum 54 - Unsynchronized Cross-modifying code - refers to the
+ * practice of modifying code on one processor where another has prefetched
+ * the unmodified version of the code. Intel states that unpredictable general
+ * protection faults may result if a synchronizing instruction (iret, int,
+ * int3, cpuid, etc ) is not executed on the second processor before it
+ * executes the pre-fetched out-of-date copy of the instruction.
+ *
+ * When we became aware of this I had a long discussion with Intel's
+ * microarchitecture guys. It turns out that the reason for this erratum
+ * (which incidentally Intel does not intend to fix) is because the trace
+ * cache - the stream of micorops resulting from instruction interpretation -
+ * cannot guaranteed to be valid. Reading between the lines I assume this
+ * issue arises because of optimization done in the trace cache, where it is
+ * no longer possible to identify the original instruction boundaries. If the
+ * CPU discoverers that the trace cache has been invalidated because of
+ * unsynchronized cross-modification then instruction execution will be
+ * aborted with a GPF. Further discussion with Intel revealed that replacing
+ * the first opcode byte with an int3 would not be subject to this erratum.
+ *
+ * So, is cmpxchg reliable? One has to guarantee more than mere atomicity."
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ */
+
+#include <linux/notifier.h>
+#include <linux/mutex.h>
+#include <linux/preempt.h>
+#include <linux/smp.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/condcall.h>
+#include <linux/kdebug.h>
+
+#include <asm/cacheflush.h>
+
+#define BREAKPOINT_INSTRUCTION  0xcc
+#define BREAKPOINT_INS_LEN 1
+
+static DEFINE_MUTEX(cond_call_mutex);
+static long target_eip = 0;
+
+static void cond_call_synchronize_core(void *info)
+{
+	sync_core();	/* use cpuid to stop speculative execution */
+}
+
+/* The eip value points right after the breakpoint instruction, in the second
+ * byte of the movb. Incrementing it of 1 byte makes the code resume right after
+ * the movb instruction, effectively skipping this instruction.
+ *
+ * We simply skip the 2 bytes load immediate here, leaving the register in an
+ * undefined state. We don't care about the content (0 or !0), because we are
+ * changing the value 0->1 or 1->0. This small window of undefined value
+ * doesn't matter.
+ */
+static int cond_call_notifier(struct notifier_block *nb,
+	unsigned long val, void *data)
+{
+	enum die_val die_val = (enum die_val) val;
+	struct die_args *args = (struct die_args *)data;
+
+	if (!args->regs || user_mode_vm(args->regs))
+		return NOTIFY_DONE;
+
+	if (die_val == DIE_INT3	&& args->regs->eip == target_eip) {
+		args->regs->eip += 1; /* Skip the next byte of load immediate */
+		return NOTIFY_STOP;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block cond_call_notify = {
+	.notifier_call = cond_call_notifier,
+	.priority = 0x7fffffff,	/* we need to be notified first */
+};
+
+int cond_call_optimized_set_enable(void *address, char enable)
+{
+	char saved_byte;
+	int ret;
+	char *dest = address;
+
+	mutex_lock(&cond_call_mutex);
+
+	if (!(enable ^ dest[1])) /* Must be a state change 0<->1 to execute */
+		goto end;
+
+	target_eip = (long)address + BREAKPOINT_INS_LEN;
+	/* register_die_notifier has memory barriers */
+	register_die_notifier(&cond_call_notify);
+	saved_byte = *dest;
+	*dest = BREAKPOINT_INSTRUCTION;
+	wmb();
+	/* Execute serializing instruction on each CPU.
+	 * Acts as a memory barrier. */
+	ret = on_each_cpu(cond_call_synchronize_core, NULL, 1, 1);
+	BUG_ON(ret != 0);
+
+	dest[1] = enable;
+	wmb();
+	*dest = saved_byte;
+		/* Wait for all int3 handlers to end
+		   (interrupts are disabled in int3).
+		   This CPU is clearly not in a int3 handler,
+		   because int3 handler is not preemptible and
+		   there cannot be any more int3 handler called
+		   for this site, because we placed the original
+		   instruction back.
+		   synchronize_sched has memory barriers */
+	synchronize_sched();
+	unregister_die_notifier(&cond_call_notify);
+	/* unregister_die_notifier has memory barriers */
+	target_eip = 0;
+end:
+	mutex_unlock(&cond_call_mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cond_call_optimized_set_enable);

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [patch 6/9] Conditional Calls - PowerPC Optimization
  2007-05-30 14:00 [patch 0/9] Conditional Calls - for 2.6.22-rc2-mm1 Mathieu Desnoyers
                   ` (4 preceding siblings ...)
  2007-05-30 14:00 ` [patch 5/9] Conditional Calls - i386 Optimization Mathieu Desnoyers
@ 2007-05-30 14:00 ` Mathieu Desnoyers
  2007-05-30 14:00 ` [patch 7/9] Conditional Calls - Documentation Mathieu Desnoyers
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-30 14:00 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers

[-- Attachment #1: conditional-calls-powerpc-optimization.patch --]
[-- Type: text/plain, Size: 2965 bytes --]

PowerPC optimization of the cond_calls which uses a li instruction, patched with
an immediate value 0 or 1 to enable/disable the cond_call.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
 include/asm-powerpc/condcall.h |   68 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 67 insertions(+), 1 deletion(-)

Index: linux-2.6-lttng/include/asm-powerpc/condcall.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-powerpc/condcall.h	2007-05-29 11:55:58.000000000 -0400
+++ linux-2.6-lttng/include/asm-powerpc/condcall.h	2007-05-29 11:56:43.000000000 -0400
@@ -1 +1,67 @@
-#include <asm-generic/condcall.h>
+#ifndef _ASM_POWERPC_CONDCALL_H
+#define _ASM_POWERPC_CONDCALL_H
+
+/*
+ * Conditional function calls. PowerPC architecture optimizations.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <asm/asm-compat.h>
+
+#ifdef CONFIG_COND_CALL
+
+#define CF_DEFAULT (CF_OPTIMIZED | CF_LOCKDEP | CF_PRINTK)
+
+/* Optimized version of the cond_call */
+#define cond_call_optimized(flags, name, func) \
+	({ \
+		static const char __cstrtab_name_##name[] \
+		__attribute__((section("__cond_call_strings"))) = #name; \
+		char condition; \
+		asm (	".section __cond_call, \"a\", @progbits;\n\t" \
+					PPC_LONG "%1, 0f, %2;\n\t" \
+					".previous;\n\t" \
+					".align 4\n\t" \
+					"0:\n\t" \
+					"li %0,%3;\n\t" \
+				: "=r" (condition) \
+				: "i" (__cstrtab_name_##name), \
+				  "i" (flags), \
+				  "i" ((flags) & CF_STATIC_ENABLE)); \
+		(unlikely(condition)) ? \
+			(func) : \
+			(__typeof__(func))0; \
+	})
+
+/* cond_call macro selecting the generic or optimized version of cond_call,
+ * depending on the flags specified. */
+#define _cond_call(flags, name, func) \
+({ \
+	((flags) & CF_OPTIMIZED) ? \
+		cond_call_optimized(flags, name, func) : \
+		cond_call_generic(flags, name, func); \
+})
+
+/* cond_call with default behavior */
+#define cond_call(name, func) _cond_call(CF_DEFAULT, name, func)
+
+/* Architecture dependant cond_call information, used internally for cond_call
+ * activation. */
+
+/* Offset of the immediate value from the start of the addi instruction (result
+ * of the li mnemonic), in bytes. */
+#define COND_CALL_OPTIMIZED_ENABLE_IMMEDIATE_OFFSET 2
+#define COND_CALL_OPTIMIZED_ENABLE_TYPE unsigned short
+/* Dereference enable as lvalue from a pointer to its instruction */
+#define COND_CALL_OPTIMIZED_ENABLE(a) \
+	*(COND_CALL_OPTIMIZED_ENABLE_TYPE*) \
+		((char*)a+COND_CALL_OPTIMIZED_ENABLE_IMMEDIATE_OFFSET)
+
+extern int cond_call_optimized_set_enable(void *address, char enable);
+
+#endif
+#endif //_ASM_POWERPC_CONDCALL_H

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [patch 7/9] Conditional Calls - Documentation
  2007-05-30 14:00 [patch 0/9] Conditional Calls - for 2.6.22-rc2-mm1 Mathieu Desnoyers
                   ` (5 preceding siblings ...)
  2007-05-30 14:00 ` [patch 6/9] Conditional Calls - PowerPC Optimization Mathieu Desnoyers
@ 2007-05-30 14:00 ` Mathieu Desnoyers
  2007-05-30 14:00 ` [patch 8/9] F00F bug fixup for i386 - use conditional calls Mathieu Desnoyers
  2007-05-30 14:00 ` [patch 9/9] Scheduler profiling - Use " Mathieu Desnoyers
  8 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-30 14:00 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers

[-- Attachment #1: conditional-calls-documentation.patch --]
[-- Type: text/plain, Size: 2816 bytes --]

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
 Documentation/condcall.txt |   60 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

Index: linux-2.6-lttng/Documentation/condcall.txt
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/Documentation/condcall.txt	2007-05-17 01:55:05.000000000 -0400
@@ -0,0 +1,60 @@
+		        Using the Conditional Calls
+
+			    Mathieu Desnoyers
+
+
+This document introduces Conditional Calls and their use.
+
+* Purpose of conditional calls
+
+A conditional call is used to compile into the kernel a function call that is
+disabled at compile-time. Then, at runtime, it can be enabled dynamically. As
+explained below, the opposite can also be done.
+
+
+* Usage
+
+In order to use the macro cond_call, you should include linux/condcall.h.
+
+#include <linux/condcall.h>
+
+Add, in your code :
+
+ret = cond_call(examplecondcall, myfunc(a, b));
+
+Where :
+- examplecondcall is the conditional call identifier
+- myfunc(a, b) is a call to myfunc with valid parameters.
+- "ret" is, optionally, the variable in which the return value of myfunc() must
+  be put. If the conditional call is disabled, ret will be set to 0 casted to
+  the type of the return value of myfunc().
+
+Use cond_call_arm("examplecondcall") to activate the conditional call.
+
+Use cond_call_disarm("examplecondcall") to deactivate the conditional call.
+
+Use cond_call_query("examplecondcall") to query the conditional call state.
+
+The cond_call mechanism supports inserting multiple instances of the same
+cond_call. Conditional calls can be put in inline functions, inlined static
+functions, and unrolled loops.
+
+
+* Optimization for a given architecture
+
+One can implement optimized conditional calls for a given architecture by
+replacing asm-$ARCH/condcall.h.
+
+The CF_* flags can be used to control the type of conditional call. See the
+include/linux/condcall.h header for the list of flags. They can be specified as
+the first parameter of the _cond_call() macro, as in the following example,
+which declares a cond_call enabled statically (which can be disabled/reenabled
+dynamically)
+
+ret = _cond_call(CF_DEFAULT | CF_STATIC_ENABLE, examplecondcall, myfunc(a, b));
+
+Another example of flag usage is to declare a cond_call that always uses the
+generic version of the cond_calls. It can be useful to use this when cond_calls
+are placed in kernel code presenting particular reentrancy challenges.
+
+ret = _cond_call(CF_DEFAULT & ~CF_OPTIMIZED, examplecondcall, myfunc(a, b));

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [patch 8/9] F00F bug fixup for i386 - use conditional calls
  2007-05-30 14:00 [patch 0/9] Conditional Calls - for 2.6.22-rc2-mm1 Mathieu Desnoyers
                   ` (6 preceding siblings ...)
  2007-05-30 14:00 ` [patch 7/9] Conditional Calls - Documentation Mathieu Desnoyers
@ 2007-05-30 14:00 ` Mathieu Desnoyers
  2007-05-30 20:33   ` Andrew Morton
  2007-05-30 14:00 ` [patch 9/9] Scheduler profiling - Use " Mathieu Desnoyers
  8 siblings, 1 reply; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-30 14:00 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers

[-- Attachment #1: f00f-bug-use-cond-calls.patch --]
[-- Type: text/plain, Size: 2891 bytes --]

Use the faster conditional calls for F00F bug handling in do_page_fault.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>

---
 arch/i386/Kconfig.cpu    |    1 +
 arch/i386/kernel/traps.c |    2 ++
 arch/i386/mm/fault.c     |   35 ++++++++++++++++++++++-------------
 3 files changed, 25 insertions(+), 13 deletions(-)

Index: linux-2.6-lttng/arch/i386/kernel/traps.c
===================================================================
--- linux-2.6-lttng.orig/arch/i386/kernel/traps.c	2007-05-29 11:05:30.000000000 -0400
+++ linux-2.6-lttng/arch/i386/kernel/traps.c	2007-05-29 11:07:07.000000000 -0400
@@ -31,6 +31,7 @@
 #include <linux/uaccess.h>
 #include <linux/nmi.h>
 #include <linux/bug.h>
+#include <linux/condcall.h>
 
 #ifdef CONFIG_EISA
 #include <linux/ioport.h>
@@ -1084,6 +1085,7 @@
 	 */
 	idt_descr.address = fix_to_virt(FIX_F00F_IDT);
 	load_idt(&idt_descr);
+	BUG_ON(cond_call_arm("fix_f00f"));
 }
 #endif
 
Index: linux-2.6-lttng/arch/i386/mm/fault.c
===================================================================
--- linux-2.6-lttng.orig/arch/i386/mm/fault.c	2007-05-29 11:05:48.000000000 -0400
+++ linux-2.6-lttng/arch/i386/mm/fault.c	2007-05-29 11:13:16.000000000 -0400
@@ -25,6 +25,7 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/kdebug.h>
+#include <linux/condcall.h>
 
 #include <asm/system.h>
 #include <asm/desc.h>
@@ -221,6 +222,25 @@
 
 fastcall void do_invalid_op(struct pt_regs *, unsigned long);
 
+#ifdef CONFIG_X86_F00F_BUG
+/*
+ * Pentium F0 0F C7 C8 bug workaround.
+ */
+static inline int do_f00f_workaround(struct pt_regs *regs,
+		unsigned long address)
+{
+	unsigned long nr;
+
+	nr = (address - idt_descr.address) >> 3;
+
+	if (nr == 6) {
+		do_invalid_op(regs, 0);
+		return 1;
+	}
+	return 0;
+}
+#endif
+
 static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 {
 	unsigned index = pgd_index(address);
@@ -474,19 +494,8 @@
 	}
 
 #ifdef CONFIG_X86_F00F_BUG
-	/*
-	 * Pentium F0 0F C7 C8 bug workaround.
-	 */
-	if (boot_cpu_data.f00f_bug) {
-		unsigned long nr;
-		
-		nr = (address - idt_descr.address) >> 3;
-
-		if (nr == 6) {
-			do_invalid_op(regs, 0);
-			return;
-		}
-	}
+	if (cond_call(fix_f00f, do_f00f_workaround(regs, address)))
+		return;
 #endif
 
 no_context:
Index: linux-2.6-lttng/arch/i386/Kconfig.cpu
===================================================================
--- linux-2.6-lttng.orig/arch/i386/Kconfig.cpu	2007-05-29 11:51:46.000000000 -0400
+++ linux-2.6-lttng/arch/i386/Kconfig.cpu	2007-05-29 11:52:08.000000000 -0400
@@ -275,6 +275,7 @@
 config X86_F00F_BUG
 	bool
 	depends on M586MMX || M586TSC || M586 || M486 || M386
+	select COND_CALL
 	default y
 
 config X86_WP_WORKS_OK

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-30 14:00 [patch 0/9] Conditional Calls - for 2.6.22-rc2-mm1 Mathieu Desnoyers
                   ` (7 preceding siblings ...)
  2007-05-30 14:00 ` [patch 8/9] F00F bug fixup for i386 - use conditional calls Mathieu Desnoyers
@ 2007-05-30 14:00 ` Mathieu Desnoyers
  2007-05-30 20:34   ` Andrew Morton
                     ` (3 more replies)
  8 siblings, 4 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-30 14:00 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers

[-- Attachment #1: profiling-use-cond-calls.patch --]
[-- Type: text/plain, Size: 2035 bytes --]

Use conditional calls with lower d-cache hit in optimized version as a
condition for scheduler profiling call.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>

---
 kernel/profile.c |    4 ++++
 kernel/sched.c   |    4 +++-
 2 files changed, 7 insertions(+), 1 deletion(-)

Index: linux-2.6-lttng/kernel/profile.c
===================================================================
--- linux-2.6-lttng.orig/kernel/profile.c	2007-05-30 08:42:29.000000000 -0400
+++ linux-2.6-lttng/kernel/profile.c	2007-05-30 08:45:22.000000000 -0400
@@ -23,6 +23,7 @@
 #include <linux/profile.h>
 #include <linux/highmem.h>
 #include <linux/mutex.h>
+#include <linux/condcall.h>
 #include <asm/sections.h>
 #include <asm/semaphore.h>
 #include <asm/irq_regs.h>
@@ -92,6 +93,8 @@
 		printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
 			prof_shift);
 	}
+	if (prof_on)
+		BUG_ON(cond_call_arm("profile_on"));
 	return 1;
 }
 __setup("profile=", profile_setup);
@@ -556,6 +559,7 @@
 	return 0;
 out_cleanup:
 	prof_on = 0;
+	cond_call_disarm("profile_on");
 	smp_mb();
 	on_each_cpu(profile_nop, NULL, 0, 1);
 	for_each_online_cpu(cpu) {
Index: linux-2.6-lttng/kernel/sched.c
===================================================================
--- linux-2.6-lttng.orig/kernel/sched.c	2007-05-30 08:43:02.000000000 -0400
+++ linux-2.6-lttng/kernel/sched.c	2007-05-30 08:45:22.000000000 -0400
@@ -59,6 +59,7 @@
 #include <linux/kprobes.h>
 #include <linux/delayacct.h>
 #include <linux/reciprocal_div.h>
+#include <linux/condcall.h>
 
 #include <asm/tlb.h>
 #include <asm/unistd.h>
@@ -2990,7 +2991,8 @@
 			print_irqtrace_events(prev);
 		dump_stack();
 	}
-	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
+	cond_call(profile_on,
+		profile_hit(SCHED_PROFILING, __builtin_return_address(0)));
 
 	/*
 	 * The idle thread is not allowed to schedule!

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-05-30 14:00 ` [patch 1/9] Conditional Calls - Architecture Independent Code Mathieu Desnoyers
@ 2007-05-30 20:32   ` Andrew Morton
  2007-05-31 16:34     ` Mathieu Desnoyers
  2007-05-31 13:47   ` Andi Kleen
  2007-06-04 19:01   ` Adrian Bunk
  2 siblings, 1 reply; 57+ messages in thread
From: Andrew Morton @ 2007-05-30 20:32 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: linux-kernel

On Wed, 30 May 2007 10:00:26 -0400
Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:

> Conditional calls are used to compile in code that is meant to be dynamically
> enabled at runtime. When code is disabled, it has a very small footprint.
> 
> It has a generic cond_call version and optimized per architecture cond_calls.
> The optimized cond_call uses a load immediate to remove a data cache hit.
> 
> It adds a new rodata section "__cond_call" to place the pointers to the enable
> value.
> 
> cond_call activation functions sits in module.c.
> 

The above doesn't really describe what these things are, nor what they are
used for, nor how they actually work.  It's all a bit of a mystery.

The i386 implementation appears to be using self-modifying code, which is
intriguing.

<finds the documentation patch>

OK, that helps somewhat.

> ---
>  include/asm-generic/vmlinux.lds.h |   16 +-
>  include/linux/condcall.h          |   91 +++++++++++++
>  include/linux/module.h            |    4 
>  kernel/module.c                   |  248 ++++++++++++++++++++++++++++++++++++++
>  4 files changed, 356 insertions(+), 3 deletions(-)
> 
> Index: linux-2.6-lttng/include/linux/condcall.h
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-lttng/include/linux/condcall.h	2007-05-17 02:13:53.000000000 -0400
> @@ -0,0 +1,91 @@
> +#ifndef _LINUX_CONDCALL_H
> +#define _LINUX_CONDCALL_H
> +
> +/*
> + * Conditional function calls. Cheap when disabled, enabled at runtime.
> + *
> + * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> + *
> + * This file is released under the GPLv2.
> + * See the file COPYING for more details.
> + */
> +
> +#ifdef __KERNEL__
> +
> +struct __cond_call_struct {
> +	const char *name;
> +	void *enable;
> +	int flags;
> +} __attribute__((packed));

Please document data structures lavishly.

> +
> +/* Cond call flags : selects the mechanism used to enable the conditional calls
> + * and prescribe what can be executed within their function. This is primarily
> + * used at reentrancy-unfriendly sites. */

You consistently use the wrong commenting style.  We prefer

/* Cond call flags : selects the mechanism used to enable the conditional calls
 * and prescribe what can be executed within their function. This is primarily
 * used at reentrancy-unfriendly sites.
 */

or

/*
 * Cond call flags : selects the mechanism used to enable the conditional calls
 * and prescribe what can be executed within their function. This is primarily
 * used at reentrancy-unfriendly sites.
 */

> +#ifdef CONFIG_COND_CALL_ENABLE_OPTIMIZATION
> +#include <asm/condcall.h>		/* optimized cond_call flavor */
> +#else
> +#include <asm-generic/condcall.h>	/* fallback on generic cond_call */
> +#endif

The preferred way to do this is to give every architecture an
asm/condcall.h and from within that, include asm-generic/condcall.h.  Your
[patch 3/9] does most of that, but it didn't remove the above ifdef, and I
don't think it removed the should-be-unneeded
CONFIG_COND_CALL_ENABLE_OPTIMIZATION either?

> +#define COND_CALL_MAX_FORMAT_LEN	1024
> +
> +extern int cond_call_arm(const char *name);
> +extern int cond_call_disarm(const char *name);
> +
> +/* cond_call_query : Returns 1 if enabled, 0 if disabled or not present */
> +extern int cond_call_query(const char *name);
> +extern int cond_call_list(const char *name);
> +
> +#endif /* __KERNEL__ */
> +#endif
> Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
> ===================================================================
> --- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2007-05-17 02:12:25.000000000 -0400
> +++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2007-05-17 02:13:42.000000000 -0400
> @@ -116,11 +116,22 @@
>  		*(__kcrctab_gpl_future)					\
>  		VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .;	\
>  	}								\
> -									\
> +	/* Conditional calls: pointers */				\
> +	__cond_call : AT(ADDR(__cond_call) - LOAD_OFFSET) {		\
> +		VMLINUX_SYMBOL(__start___cond_call) = .;		\
> +		*(__cond_call)						\
> +		VMLINUX_SYMBOL(__stop___cond_call) = .;			\
> +	}								\
>  	/* Kernel symbol table: strings */				\
>          __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {	\
>  		*(__ksymtab_strings)					\
> -	}								\
> + 	}								\
> +	/* Conditional calls: strings */				\
> +        __cond_call_strings : AT(ADDR(__cond_call_strings) - LOAD_OFFSET) { \

whitespace went bad here.



^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-05-30 14:00 ` [patch 2/9] Conditional Calls - Hash Table Mathieu Desnoyers
@ 2007-05-30 20:32   ` Andrew Morton
  2007-05-31 13:42   ` Andi Kleen
  1 sibling, 0 replies; 57+ messages in thread
From: Andrew Morton @ 2007-05-30 20:32 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: linux-kernel

On Wed, 30 May 2007 10:00:27 -0400
Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:

> Reimplementation of the cond calls which uses a hash table to hold the active
> cond_calls. It permits to first arm a cond_call and then load supplementary
> modules that contain this cond_call.

This patch so completely mangles [patch 1/9] that I'd suggest they be
merged together?

> Without this, the order of loading a module containing a cond_call and arming a
> cond_call matters and there is no symbol dependency to check this.
> 
> At module load time, each cond_call checks if it is enabled in the hash table
> and is set accordingly.
> 

<again wonders what a cond_call is, and how it works>

> Index: linux-2.6-lttng/kernel/module.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/module.c	2007-05-17 01:42:50.000000000 -0400
> +++ linux-2.6-lttng/kernel/module.c	2007-05-17 01:46:42.000000000 -0400
> @@ -33,6 +33,8 @@
>  #include <linux/moduleparam.h>
>  #include <linux/errno.h>
>  #include <linux/condcall.h>
> +#include <linux/jhash.h>
> +#include <linux/list.h>
>  #include <linux/err.h>
>  #include <linux/vermagic.h>
>  #include <linux/notifier.h>
> @@ -71,6 +73,20 @@
>  
>  static BLOCKING_NOTIFIER_HEAD(module_notify_list);
>  
> +#ifdef CONFIG_COND_CALL
> +/* Conditional call hash table, containing the active cond_calls.
> + * Protected by module_mutex. */
> +#define COND_CALL_HASH_BITS 6
> +#define COND_CALL_TABLE_SIZE (1 << COND_CALL_HASH_BITS)
> +
> +struct cond_call_entry {
> +	struct hlist_node hlist;
> +	char name[0];
> +};
> +
> +static struct hlist_head cond_call_table[COND_CALL_TABLE_SIZE];
> +#endif //CONFIG_COND_CALL

No //'s, please.

> +/* Remove the cond_call from the hash table. Must be called with mutex_lock
> + * held. */
> +static void hash_remove_cond_call(const char *name)
> +{
> +	struct hlist_head *head;
> +	struct hlist_node *node;
> +	struct cond_call_entry *e;
> +	int found = 0;
> +	size_t len = strlen(name);
> +	u32 hash = jhash(name, len, 0);
> +
> +	head = &cond_call_table[hash & ((1 << COND_CALL_HASH_BITS)-1)];
> +	hlist_for_each_entry(e, node, head, hlist)
> +		if (!strcmp(name, e->name)) {
> +			found = 1;
> +			break;
> +		}

Layout looks funny.  I'd suggest that the extra { and } be added.

> +	if (found) {
> +		hlist_del(&e->hlist);
> +		kfree(e);
> +	}
> +}

>  /* Set the enable bit of the cond_call, choosing the generic or architecture
>   * specific functions depending on the cond_call's flags.
> @@ -317,59 +395,53 @@
>  		return cond_call_generic_set_enable(address, enable);
>  }
>  
> -/* Query the state of a cond_calls range. */
> -static int _cond_call_query_range(const char *name,
> -	const struct __cond_call_struct *begin,
> -	const struct __cond_call_struct *end)
> +static int cond_call_get_enable(void *address, int flags)
> +{
> +	if (flags & CF_OPTIMIZED)
> +		return COND_CALL_OPTIMIZED_ENABLE(address);
> +	else
> +		return COND_CALL_GENERIC_ENABLE(address);
> +}

I don't get this bit.  Does CF_OPTIMIZED indicate that we're running on an
arch which has the hadn-optimised implentation?  If so, is it not the case
that _all_ cond_call sites will have CF_OPTIMIZED set?  And if so, why
would we need to perform this test at runtime?

(The preferred way of answering questions like this is via a suitable
comment in the next version of the patchset, btw.  So that others don't end
up wondering the same things).

> +static void module_cond_call_setup(struct module *mod)
> +{
> +}

I suppose that should have been inlined, although I expect the compiler
will do that anyway.



^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 5/9] Conditional Calls - i386 Optimization
  2007-05-30 14:00 ` [patch 5/9] Conditional Calls - i386 Optimization Mathieu Desnoyers
@ 2007-05-30 20:33   ` Andrew Morton
  2007-05-31 13:54   ` Andi Kleen
  1 sibling, 0 replies; 57+ messages in thread
From: Andrew Morton @ 2007-05-30 20:33 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: linux-kernel

On Wed, 30 May 2007 10:00:30 -0400
Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:

> i386 optimization of the cond_calls which uses a movb with code patching to
> set/unset the value used to populate the register used for the branch test.
> 
> +/*
> + * Conditional function calls. i386 architecture optimizations.
> + *
> + * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> + *
> + * This file is released under the GPLv2.
> + * See the file COPYING for more details.
> + */
> +
> +
> +#ifdef CONFIG_COND_CALL
> +
> +#define CF_DEFAULT (CF_OPTIMIZED | CF_LOCKDEP | CF_PRINTK)
> +
> +/* Optimized version of the cond_call. Passing the flags as a pointer to
> + * the inline assembly to trick it into putting the flags value as third
> + * parameter in the structure. */
> +#define cond_call_optimized(flags, name, func) \
> +	({ \
> +		static const char __cstrtab_name_##name[] \
> +		__attribute__((section("__cond_call_strings"))) = #name; \
> +		char condition; \
> +		asm (	".section __cond_call, \"a\", @progbits;\n\t" \
> +					".long %1, 0f, %2;\n\t" \
> +					".previous;\n\t" \
> +					".align 2\n\t" \
> +					"0:\n\t" \
> +					"movb %3,%0;\n\t" \
> +				: "=r" (condition) \
> +				: "m" (*__cstrtab_name_##name), \
> +				  "m" (*(char*)flags), \
> +				  "i" ((flags) & CF_STATIC_ENABLE)); \
> +		(likely(!condition)) ? \
> +			(__typeof__(func))0 : \
> +			(func); \
> +	})

People often tab the \ characters across to the right, make them line up
nicely.  It does look better.

> +/* cond_call macro selecting the generic or optimized version of cond_call,
> + * depending on the flags specified. */
> +#define _cond_call(flags, name, func) \
> +({ \
> +	(((flags) & CF_LOCKDEP) && ((flags) & CF_OPTIMIZED)) ? \
> +		cond_call_optimized(flags, name, func) : \
> +		cond_call_generic(flags, name, func); \
> +})

So it is a macro instead of a static inline because we need to emit an
entry into __cond_call_strings once per callsite, yes?

> +/* Architecture dependant cond_call information, used internally for cond_call
> + * activation. */
> +
> +/* Offset of the immediate value from the start of the movb instruction, in
> + * bytes. */
> +#define COND_CALL_OPTIMIZED_ENABLE_IMMEDIATE_OFFSET 1
> +#define COND_CALL_OPTIMIZED_ENABLE_TYPE unsigned char
> +/* Dereference enable as lvalue from a pointer to its instruction */
> +#define COND_CALL_OPTIMIZED_ENABLE(a) \
> +	*(COND_CALL_OPTIMIZED_ENABLE_TYPE*) \
> +		((char*)a+COND_CALL_OPTIMIZED_ENABLE_IMMEDIATE_OFFSET)

I suspect this is underparenthesised.

> +extern int cond_call_optimized_set_enable(void *address, char enable);
> +
> +#endif
> +#endif //_ASM_I386_CONDCALL_H
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-lttng/arch/i386/kernel/condcall.c	2007-05-17 01:52:38.000000000 -0400
> @@ -0,0 +1,140 @@
> +/* condcall.c
> + *
> + * - Erratum 49 fix for Intel PIII.
> + * - Still present on newer processors : Intel Core 2 Duo Processor for Intel
> + *   Centrino Duo Processor Technology Specification Update, AH33.
> + *   Unsynchronized Cross-Modifying Code Operations Can Cause Unexpected
> + *   Instruction Execution Results.
> + *
> + * Permits cond_call activation by XMC with correct serialization.
> + *
> + * Reentrant for NMI and trap handler instrumentation. Permits XMC to a
> + * location that has preemption enabled because it involves no temporary or
> + * reused data structure.
> + *
> + * Quoting Richard J Moore, source of the information motivating this
> + * implementation which differs from the one proposed by Intel which is not
> + * suitable for kernel context (does not support NMI and would require disabling
> + * interrupts on every CPU for a long period) :
> + *
> + * "There is another issue to consider when looking into using probes other
> + * then int3:
> + *
> + * Intel erratum 54 - Unsynchronized Cross-modifying code - refers to the
> + * practice of modifying code on one processor where another has prefetched
> + * the unmodified version of the code. Intel states that unpredictable general
> + * protection faults may result if a synchronizing instruction (iret, int,
> + * int3, cpuid, etc ) is not executed on the second processor before it
> + * executes the pre-fetched out-of-date copy of the instruction.
> + *
> + * When we became aware of this I had a long discussion with Intel's
> + * microarchitecture guys. It turns out that the reason for this erratum
> + * (which incidentally Intel does not intend to fix) is because the trace
> + * cache - the stream of micorops resulting from instruction interpretation -
> + * cannot guaranteed to be valid. Reading between the lines I assume this
> + * issue arises because of optimization done in the trace cache, where it is
> + * no longer possible to identify the original instruction boundaries. If the
> + * CPU discoverers that the trace cache has been invalidated because of
> + * unsynchronized cross-modification then instruction execution will be
> + * aborted with a GPF. Further discussion with Intel revealed that replacing
> + * the first opcode byte with an int3 would not be subject to this erratum.
> + *
> + * So, is cmpxchg reliable? One has to guarantee more than mere atomicity."
> + *
> + * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> + */
> +
> +#include <linux/notifier.h>
> +#include <linux/mutex.h>
> +#include <linux/preempt.h>
> +#include <linux/smp.h>
> +#include <linux/notifier.h>
> +#include <linux/module.h>
> +#include <linux/condcall.h>
> +#include <linux/kdebug.h>
> +
> +#include <asm/cacheflush.h>
> +
> +#define BREAKPOINT_INSTRUCTION  0xcc
> +#define BREAKPOINT_INS_LEN 1
> +
> +static DEFINE_MUTEX(cond_call_mutex);
> +static long target_eip = 0;

s/= 0//

> +static void cond_call_synchronize_core(void *info)
> +{
> +	sync_core();	/* use cpuid to stop speculative execution */
> +}
> +
> +/* The eip value points right after the breakpoint instruction,

What breakpoint insn?  How did it get there?

>  in the second
> + * byte of the movb. Incrementing it of 1 byte makes the code resume right after
> + * the movb instruction, effectively skipping this instruction.
> + *
> + * We simply skip the 2 bytes load immediate here, leaving the register in an
> + * undefined state. We don't care about the content (0 or !0), because we are
> + * changing the value 0->1 or 1->0. This small window of undefined value
> + * doesn't matter.
> + */
> +static int cond_call_notifier(struct notifier_block *nb,
> +	unsigned long val, void *data)
> +{
> +	enum die_val die_val = (enum die_val) val;
> +	struct die_args *args = (struct die_args *)data;

Unneeded cast of void*

> +	if (!args->regs || user_mode_vm(args->regs))
> +		return NOTIFY_DONE;
> +
> +	if (die_val == DIE_INT3	&& args->regs->eip == target_eip) {
> +		args->regs->eip += 1; /* Skip the next byte of load immediate */
> +		return NOTIFY_STOP;
> +	}
> +	return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block cond_call_notify = {
> +	.notifier_call = cond_call_notifier,
> +	.priority = 0x7fffffff,	/* we need to be notified first */
> +};
> +
> +int cond_call_optimized_set_enable(void *address, char enable)
> +{
> +	char saved_byte;
> +	int ret;
> +	char *dest = address;
> +
> +	mutex_lock(&cond_call_mutex);
> +
> +	if (!(enable ^ dest[1])) /* Must be a state change 0<->1 to execute */
> +		goto end;
> +
> +	target_eip = (long)address + BREAKPOINT_INS_LEN;
> +	/* register_die_notifier has memory barriers */
> +	register_die_notifier(&cond_call_notify);
> +	saved_byte = *dest;
> +	*dest = BREAKPOINT_INSTRUCTION;
> +	wmb();
> +	/* Execute serializing instruction on each CPU.
> +	 * Acts as a memory barrier. */
> +	ret = on_each_cpu(cond_call_synchronize_core, NULL, 1, 1);
> +	BUG_ON(ret != 0);
> +
> +	dest[1] = enable;
> +	wmb();
> +	*dest = saved_byte;
> +		/* Wait for all int3 handlers to end
> +		   (interrupts are disabled in int3).
> +		   This CPU is clearly not in a int3 handler,
> +		   because int3 handler is not preemptible and
> +		   there cannot be any more int3 handler called
> +		   for this site, because we placed the original
> +		   instruction back.
> +		   synchronize_sched has memory barriers */
> +	synchronize_sched();
> +	unregister_die_notifier(&cond_call_notify);
> +	/* unregister_die_notifier has memory barriers */
> +	target_eip = 0;
> +end:
> +	mutex_unlock(&cond_call_mutex);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(cond_call_optimized_set_enable);

I'm not really able to review this material very usefully without having
seen an overall description of what it all does and how it all works.

What happens here when the text section is marked read-only, and when
CONFIG_DEBUG_PAGEALLOC is in use?


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 8/9] F00F bug fixup for i386 - use conditional calls
  2007-05-30 14:00 ` [patch 8/9] F00F bug fixup for i386 - use conditional calls Mathieu Desnoyers
@ 2007-05-30 20:33   ` Andrew Morton
  2007-05-31 21:07     ` Mathieu Desnoyers
  0 siblings, 1 reply; 57+ messages in thread
From: Andrew Morton @ 2007-05-30 20:33 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: linux-kernel

On Wed, 30 May 2007 10:00:33 -0400
Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:

> Use the faster conditional calls for F00F bug handling in do_page_fault.
> 

I guess this means that CONDCALL will be enabled on pretty much all i386,
in which case making the whole feature Kconfigurable is starting to look
marginal.

Perhaps a better approach would have to made this change dependent upon
CONDCALL, rather than forcing it on.

> @@ -1084,6 +1085,7 @@
>  	 */
>  	idt_descr.address = fix_to_virt(FIX_F00F_IDT);
>  	load_idt(&idt_descr);
> +	BUG_ON(cond_call_arm("fix_f00f"));

It is generally poor C style to do

	assert(something_which_has_side_effects())

because people can legitimately expect to do

#define assert() /*nothing*/

for production code.

The kernel doesn't actually do the right thing here when CONFIG_BUG=n,
sadly.  But still, children might be watching, so the better and preferred
style is

	if (cond_call_arm("fix_f00f"))
		BUG();

>  }
>  #endif
>  
> Index: linux-2.6-lttng/arch/i386/mm/fault.c
> ===================================================================
> --- linux-2.6-lttng.orig/arch/i386/mm/fault.c	2007-05-29 11:05:48.000000000 -0400
> +++ linux-2.6-lttng/arch/i386/mm/fault.c	2007-05-29 11:13:16.000000000 -0400
> @@ -25,6 +25,7 @@
>  #include <linux/kprobes.h>
>  #include <linux/uaccess.h>
>  #include <linux/kdebug.h>
> +#include <linux/condcall.h>
>  
>  #include <asm/system.h>
>  #include <asm/desc.h>
> @@ -221,6 +222,25 @@
>  
>  fastcall void do_invalid_op(struct pt_regs *, unsigned long);
>  
> +#ifdef CONFIG_X86_F00F_BUG
> +/*
> + * Pentium F0 0F C7 C8 bug workaround.
> + */
> +static inline int do_f00f_workaround(struct pt_regs *regs,
> +		unsigned long address)
> +{
> +	unsigned long nr;
> +
> +	nr = (address - idt_descr.address) >> 3;
> +
> +	if (nr == 6) {
> +		do_invalid_op(regs, 0);
> +		return 1;
> +	}
> +	return 0;
> +}
> +#endif
> +
>  static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
>  {
>  	unsigned index = pgd_index(address);
> @@ -474,19 +494,8 @@
>  	}
>  
>  #ifdef CONFIG_X86_F00F_BUG
> -	/*
> -	 * Pentium F0 0F C7 C8 bug workaround.
> -	 */
> -	if (boot_cpu_data.f00f_bug) {
> -		unsigned long nr;
> -		
> -		nr = (address - idt_descr.address) >> 3;
> -
> -		if (nr == 6) {
> -			do_invalid_op(regs, 0);
> -			return;
> -		}
> -	}
> +	if (cond_call(fix_f00f, do_f00f_workaround(regs, address)))
> +		return;

We do a cond_call() to an inlined function?  That's a bit weird, isn't it?

>  #endif
>  
>  no_context:
> Index: linux-2.6-lttng/arch/i386/Kconfig.cpu
> ===================================================================
> --- linux-2.6-lttng.orig/arch/i386/Kconfig.cpu	2007-05-29 11:51:46.000000000 -0400
> +++ linux-2.6-lttng/arch/i386/Kconfig.cpu	2007-05-29 11:52:08.000000000 -0400
> @@ -275,6 +275,7 @@
>  config X86_F00F_BUG
>  	bool
>  	depends on M586MMX || M586TSC || M586 || M486 || M386
> +	select COND_CALL

That hurts.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-30 14:00 ` [patch 9/9] Scheduler profiling - Use " Mathieu Desnoyers
@ 2007-05-30 20:34   ` Andrew Morton
  2007-06-01 15:54     ` Mathieu Desnoyers
  2007-05-30 20:59   ` William Lee Irwin III
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 57+ messages in thread
From: Andrew Morton @ 2007-05-30 20:34 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: linux-kernel

On Wed, 30 May 2007 10:00:34 -0400
Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:

> @@ -2990,7 +2991,8 @@
>  			print_irqtrace_events(prev);
>  		dump_stack();
>  	}
> -	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
> +	cond_call(profile_on,
> +		profile_hit(SCHED_PROFILING, __builtin_return_address(0)));
>  

That's looking pretty neat.  Do you have any before-and-after performance
figures for i386 and for a non-optimised architecture?

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-30 14:00 ` [patch 9/9] Scheduler profiling - Use " Mathieu Desnoyers
  2007-05-30 20:34   ` Andrew Morton
@ 2007-05-30 20:59   ` William Lee Irwin III
  2007-05-31 21:12     ` Mathieu Desnoyers
  2007-05-30 21:44   ` Matt Mackall
  2007-05-31 13:39   ` Andi Kleen
  3 siblings, 1 reply; 57+ messages in thread
From: William Lee Irwin III @ 2007-05-30 20:59 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

On Wed, May 30, 2007 at 10:00:34AM -0400, Mathieu Desnoyers wrote:
> Use conditional calls with lower d-cache hit in optimized version as a
> condition for scheduler profiling call.
[...]
> +	if (prof_on)
> +		BUG_ON(cond_call_arm("profile_on"));

What's the point of this BUG_ON()? The condition is a priori impossible.


On Wed, May 30, 2007 at 10:00:34AM -0400, Mathieu Desnoyers wrote:
> -	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
> +	cond_call(profile_on,
> +		profile_hit(SCHED_PROFILING, __builtin_return_address(0)));

It'd probably be prettier to stuff cond_call() inside a macro that does
something like this named profile_hit() with the old profile_hit()
renamed to __profile_hit() etc. Otherwise fine.


-- wli

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-30 14:00 ` [patch 9/9] Scheduler profiling - Use " Mathieu Desnoyers
  2007-05-30 20:34   ` Andrew Morton
  2007-05-30 20:59   ` William Lee Irwin III
@ 2007-05-30 21:44   ` Matt Mackall
  2007-05-31 21:36     ` Mathieu Desnoyers
  2007-05-31 13:39   ` Andi Kleen
  3 siblings, 1 reply; 57+ messages in thread
From: Matt Mackall @ 2007-05-30 21:44 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

On Wed, May 30, 2007 at 10:00:34AM -0400, Mathieu Desnoyers wrote:
> Use conditional calls with lower d-cache hit in optimized version as a
> condition for scheduler profiling call.
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> 
> ---
>  kernel/profile.c |    4 ++++
>  kernel/sched.c   |    4 +++-
>  2 files changed, 7 insertions(+), 1 deletion(-)
> 
> Index: linux-2.6-lttng/kernel/profile.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/profile.c	2007-05-30 08:42:29.000000000 -0400
> +++ linux-2.6-lttng/kernel/profile.c	2007-05-30 08:45:22.000000000 -0400
> @@ -23,6 +23,7 @@
>  #include <linux/profile.h>
>  #include <linux/highmem.h>
>  #include <linux/mutex.h>
> +#include <linux/condcall.h>
>  #include <asm/sections.h>
>  #include <asm/semaphore.h>
>  #include <asm/irq_regs.h>
> @@ -92,6 +93,8 @@
>  		printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
>  			prof_shift);
>  	}
> +	if (prof_on)
> +		BUG_ON(cond_call_arm("profile_on"));
>  	return 1;
>  }
>  __setup("profile=", profile_setup);
> @@ -556,6 +559,7 @@
>  	return 0;
>  out_cleanup:
>  	prof_on = 0;
> +	cond_call_disarm("profile_on");
>  	smp_mb();
>  	on_each_cpu(profile_nop, NULL, 0, 1);
>  	for_each_online_cpu(cpu) {
> Index: linux-2.6-lttng/kernel/sched.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/sched.c	2007-05-30 08:43:02.000000000 -0400
> +++ linux-2.6-lttng/kernel/sched.c	2007-05-30 08:45:22.000000000 -0400
> @@ -59,6 +59,7 @@
>  #include <linux/kprobes.h>
>  #include <linux/delayacct.h>
>  #include <linux/reciprocal_div.h>
> +#include <linux/condcall.h>
>  
>  #include <asm/tlb.h>
>  #include <asm/unistd.h>
> @@ -2990,7 +2991,8 @@
>  			print_irqtrace_events(prev);
>  		dump_stack();
>  	}
> -	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
> +	cond_call(profile_on,
> +		profile_hit(SCHED_PROFILING, __builtin_return_address(0)));

I think we could do better here pretty trivially. profile hit still
has an if (unlikely(prof_on == TYPE)). Shouldn't we just have a
cond_call type for "sched_profiling" and cond_call profile_hits(type,
ip, 1) directly?

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-30 14:00 ` [patch 9/9] Scheduler profiling - Use " Mathieu Desnoyers
                     ` (2 preceding siblings ...)
  2007-05-30 21:44   ` Matt Mackall
@ 2007-05-31 13:39   ` Andi Kleen
  2007-05-31 22:07     ` Mathieu Desnoyers
  3 siblings, 1 reply; 57+ messages in thread
From: Andi Kleen @ 2007-05-31 13:39 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:
>  	}
> -	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
> +	cond_call(profile_on,
> +		profile_hit(SCHED_PROFILING, __builtin_return_address(0)));

Would it be possible to use a syntax like

        if (unlikely_cond_call(variable)) {     (or better name) 
                ...
        } 

instead? I think that would be much nicer to read than having
code in a macro argument

-Andi

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-05-30 14:00 ` [patch 2/9] Conditional Calls - Hash Table Mathieu Desnoyers
  2007-05-30 20:32   ` Andrew Morton
@ 2007-05-31 13:42   ` Andi Kleen
  2007-06-01 16:08     ` Matt Mackall
  1 sibling, 1 reply; 57+ messages in thread
From: Andi Kleen @ 2007-05-31 13:42 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:

> Reimplementation of the cond calls which uses a hash table to hold the active
> cond_calls. It permits to first arm a cond_call and then load supplementary
> modules that contain this cond_call.

Hash table is probably overkill. This is a very very slow path operation.
Can you simplify the code? Just a linked list of all the condcall segments
should be enough  and then walk it.

-Andi

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-05-30 14:00 ` [patch 1/9] Conditional Calls - Architecture Independent Code Mathieu Desnoyers
  2007-05-30 20:32   ` Andrew Morton
@ 2007-05-31 13:47   ` Andi Kleen
  2007-06-05 18:40     ` Mathieu Desnoyers
  2007-06-04 19:01   ` Adrian Bunk
  2 siblings, 1 reply; 57+ messages in thread
From: Andi Kleen @ 2007-05-31 13:47 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:

> +struct __cond_call_struct {

Calling structs *_struct is severly deprecated and will cause some people
to make fun of your code.


> +	const char *name;
> +	void *enable;
> +	int flags;
> +} __attribute__((packed));

The packed doesn't seem to be needed. There will be padding at 
the end anyways because the next one needs to be aligned.

> +
> +
> +/* Cond call flags : selects the mechanism used to enable the conditional calls
> + * and prescribe what can be executed within their function. This is primarily
> + * used at reentrancy-unfriendly sites. */
> +#define CF_OPTIMIZED		(1 << 0) /* Use optimized cond_call */
> +#define CF_LOCKDEP		(1 << 1) /* Can call lockdep */
> +#define CF_PRINTK		(1 << 2) /* Probe can call vprintk */
> +#define CF_STATIC_ENABLE	(1 << 3) /* Enable cond_call statically */

Why is that all needed?  Condcall shouldn't really need to know anything
about all this. They're just a fancy conditional anyways -- and you don't
tell if() that it may need to printk.

Please consider eliminating.



> +#define _CF_NR			4


> +
> +#ifdef CONFIG_COND_CALL
> +
> +/* Generic cond_call flavor always available.
> + * Note : the empty asm volatile with read constraint is used here instead of a
> + * "used" attribute to fix a gcc 4.1.x bug. */

What gcc 4.1 bug? 

> +#define cond_call_generic(flags, name, func) \
> +	({ \
> +		static const char __cstrtab_name_##name[] \
> +		__attribute__((section("__cond_call_strings"))) = #name; \
> +		static char __cond_call_enable_##name = \
> +			(flags) & CF_STATIC_ENABLE; \
> +		static const struct __cond_call_struct __cond_call_##name \
> +			__attribute__((section("__cond_call"))) = \
> +			{ __cstrtab_name_##name, \
> +			&__cond_call_enable_##name, \
> +			(flags) & ~CF_OPTIMIZED } ; \
> +		asm volatile ( "" : : "i" (&__cond_call_##name)); \
> +		(unlikely(__cond_call_enable_##name)) ? \
> +			(func) : \
> +			(__typeof__(func))0; \
> +	})

-Andi

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 5/9] Conditional Calls - i386 Optimization
  2007-05-30 14:00 ` [patch 5/9] Conditional Calls - i386 Optimization Mathieu Desnoyers
  2007-05-30 20:33   ` Andrew Morton
@ 2007-05-31 13:54   ` Andi Kleen
  2007-06-05 19:02     ` Mathieu Desnoyers
  1 sibling, 1 reply; 57+ messages in thread
From: Andi Kleen @ 2007-05-31 13:54 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:

> +#define cond_call_optimized(flags, name, func) \
> +	({ \
> +		static const char __cstrtab_name_##name[] \
> +		__attribute__((section("__cond_call_strings"))) = #name; \
> +		char condition; \
> +		asm (	".section __cond_call, \"a\", @progbits;\n\t" \
> +					".long %1, 0f, %2;\n\t" \
> +					".previous;\n\t" \
> +					".align 2\n\t" \
> +					"0:\n\t" \
> +					"movb %3,%0;\n\t" \
> +				: "=r" (condition) \
> +				: "m" (*__cstrtab_name_##name), \
> +				  "m" (*(char*)flags), \
> +				  "i" ((flags) & CF_STATIC_ENABLE)); \

Remind me what we need the flags again for? 

I would prefer to just eliminate them and always require arming.


> +		(likely(!condition)) ? \
> +			(__typeof__(func))0 : \
> +			(func); \
> +	})
> +
> +/* cond_call macro selecting the generic or optimized version of cond_call,
> + * depending on the flags specified. */
> +#define _cond_call(flags, name, func) \
> +({ \
> +	(((flags) & CF_LOCKDEP) && ((flags) & CF_OPTIMIZED)) ? \

Similar here? unoptimized condcalls don't make much sense.
I also don't understand what the LOCKDEP flag is good for.

> Index: linux-2.6-lttng/arch/i386/kernel/condcall.c
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-lttng/arch/i386/kernel/condcall.c	2007-05-17 01:52:38.000000000 -0400
> @@ -0,0 +1,140 @@
> +/* condcall.c
> + *
> + * - Erratum 49 fix for Intel PIII.
> + * - Still present on newer processors : Intel Core 2 Duo Processor for Intel
> + *   Centrino Duo Processor Technology Specification Update, AH33.
> + *   Unsynchronized Cross-Modifying Code Operations Can Cause Unexpected
> + *   Instruction Execution Results.

Can you please define a generic utility function to do self modifying
code? There are other places that do it too.

> +static DEFINE_MUTEX(cond_call_mutex);

All locks need a comment describing what they protect and the hierarchy if 
any.

-Andi

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-05-30 20:32   ` Andrew Morton
@ 2007-05-31 16:34     ` Mathieu Desnoyers
  0 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-31 16:34 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

* Andrew Morton (akpm@linux-foundation.org) wrote:
> > +#ifdef CONFIG_COND_CALL_ENABLE_OPTIMIZATION
> > +#include <asm/condcall.h>		/* optimized cond_call flavor */
> > +#else
> > +#include <asm-generic/condcall.h>	/* fallback on generic cond_call */
> > +#endif
> 
> The preferred way to do this is to give every architecture an
> asm/condcall.h and from within that, include asm-generic/condcall.h.  Your
> [patch 3/9] does most of that, but it didn't remove the above ifdef, and I
> don't think it removed the should-be-unneeded
> CONFIG_COND_CALL_ENABLE_OPTIMIZATION either?
> 

Conditional calls works just like the previous markers in this aspect :
in order to support embedded systems with read-only memory for the text
segment, I leave the choice to disable the "optimized" cond_call as a
config option even if the architecture has the optimized marker flavor.

I use this include scheme because I want to support both the generic and
optimized version at the same time : if a _cond_call is declared with
the CF_OPTIMIZED flags unset, it will use the generic version. This is
useful when we must place cond_calls in locations that present specific
reentrancy issues, such as cond_call, printk, some trap handlers. The
optimized version, when it uses the i386 mechanism to insure correct
code modification, can trigger a trap, which will call into lockdep and
might have other side-effects.

(I am adding this text in the condcall.h header)

Mathieu


-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 8/9] F00F bug fixup for i386 - use conditional calls
  2007-05-30 20:33   ` Andrew Morton
@ 2007-05-31 21:07     ` Mathieu Desnoyers
  2007-05-31 21:21       ` Andrew Morton
  0 siblings, 1 reply; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-31 21:07 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

* Andrew Morton (akpm@linux-foundation.org) wrote:
 
> > Use the faster conditional calls for F00F bug handling in do_page_fault.
> > 
> 
> I guess this means that CONDCALL will be enabled on pretty much all i386,
> in which case making the whole feature Kconfigurable is starting to look
> marginal.
> 
> Perhaps a better approach would have to made this change dependent upon
> CONDCALL, rather than forcing it on.
> 

Do you mean making X86_F00F_BUG depend on COND_CALL instead of selecting
it ?

> > +	if (cond_call(fix_f00f, do_f00f_workaround(regs, address)))
> > +		return;
> 
> We do a cond_call() to an inlined function?  That's a bit weird, isn't it?
> 

Yes, but it works :) I will add this information to the documentation.

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-30 20:59   ` William Lee Irwin III
@ 2007-05-31 21:12     ` Mathieu Desnoyers
  2007-05-31 23:41       ` William Lee Irwin III
  0 siblings, 1 reply; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-31 21:12 UTC (permalink / raw)
  To: William Lee Irwin III; +Cc: akpm, linux-kernel

* William Lee Irwin III (wli@holomorphy.com) wrote:
> On Wed, May 30, 2007 at 10:00:34AM -0400, Mathieu Desnoyers wrote:
> > Use conditional calls with lower d-cache hit in optimized version as a
> > condition for scheduler profiling call.
> [...]
> > +	if (prof_on)
> > +		BUG_ON(cond_call_arm("profile_on"));
> 
> What's the point of this BUG_ON()? The condition is a priori impossible.
> 

Not impossible: hash_add_cond_call() can return -ENOMEM if kmalloc lacks
memory.

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 8/9] F00F bug fixup for i386 - use conditional calls
  2007-05-31 21:07     ` Mathieu Desnoyers
@ 2007-05-31 21:21       ` Andrew Morton
  2007-05-31 21:38         ` Mathieu Desnoyers
  0 siblings, 1 reply; 57+ messages in thread
From: Andrew Morton @ 2007-05-31 21:21 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: linux-kernel

On Thu, 31 May 2007 17:07:55 -0400
Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:

> * Andrew Morton (akpm@linux-foundation.org) wrote:
>  
> > > Use the faster conditional calls for F00F bug handling in do_page_fault.
> > > 
> > 
> > I guess this means that CONDCALL will be enabled on pretty much all i386,
> > in which case making the whole feature Kconfigurable is starting to look
> > marginal.
> > 
> > Perhaps a better approach would have to made this change dependent upon
> > CONDCALL, rather than forcing it on.
> > 
> 
> Do you mean making X86_F00F_BUG depend on COND_CALL instead of selecting
> it ?

yup

> > > +	if (cond_call(fix_f00f, do_f00f_workaround(regs, address)))
> > > +		return;
> > 
> > We do a cond_call() to an inlined function?  That's a bit weird, isn't it?
> > 
> 
> Yes, but it works :) I will add this information to the documentation.

But why does it work?  Did the compiler generate an out-of-line copy
of the function?  If so, we'll end up with multiple copies of the function if
there are other callers.  If not, the `inline' was pointless.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-30 21:44   ` Matt Mackall
@ 2007-05-31 21:36     ` Mathieu Desnoyers
  0 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-31 21:36 UTC (permalink / raw)
  To: Matt Mackall; +Cc: akpm, linux-kernel

* Matt Mackall (mpm@selenic.com) wrote:
> > -	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
> > +	cond_call(profile_on,
> > +		profile_hit(SCHED_PROFILING, __builtin_return_address(0)));
> 
> I think we could do better here pretty trivially. profile hit still
> has an if (unlikely(prof_on == TYPE)). Shouldn't we just have a
> cond_call type for "sched_profiling" and cond_call profile_hits(type,
> ip, 1) directly?
> 

Yes, that's much better. I will leave the profile_hit as a simple call
to profile hits with one parameter for now, although it might be a
little bit useless. Being able to select independently which specific
site must be enabled is interesting too, therefore I will use the more
precise "sched_profiling" to replace "profile_on".

I guess we could also rework profile.c:profile_tick() in the same way,
using "cpu_profiling". It would be interesting to wrap the whole
profile_tick() call in a cond_call, but it would involve dealing with
more than one different things that are part of this function and can
have to run. The same will apply to kvm and sleep profiling.

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 8/9] F00F bug fixup for i386 - use conditional calls
  2007-05-31 21:21       ` Andrew Morton
@ 2007-05-31 21:38         ` Mathieu Desnoyers
  0 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-31 21:38 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

* Andrew Morton (akpm@linux-foundation.org) wrote:
> On Thu, 31 May 2007 17:07:55 -0400
> Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:
> 
> > * Andrew Morton (akpm@linux-foundation.org) wrote:
> >  
> > > > Use the faster conditional calls for F00F bug handling in do_page_fault.
> > > > 
> > > 
> > > I guess this means that CONDCALL will be enabled on pretty much all i386,
> > > in which case making the whole feature Kconfigurable is starting to look
> > > marginal.
> > > 
> > > Perhaps a better approach would have to made this change dependent upon
> > > CONDCALL, rather than forcing it on.
> > > 
> > 
> > Do you mean making X86_F00F_BUG depend on COND_CALL instead of selecting
> > it ?
> 
> yup
> 
> > > > +	if (cond_call(fix_f00f, do_f00f_workaround(regs, address)))
> > > > +		return;
> > > 
> > > We do a cond_call() to an inlined function?  That's a bit weird, isn't it?
> > > 
> > 
> > Yes, but it works :) I will add this information to the documentation.
> 
> But why does it work?  Did the compiler generate an out-of-line copy
> of the function?  If so, we'll end up with multiple copies of the function if
> there are other callers.  If not, the `inline' was pointless.

Yes, it's doing an out-of-line copy. Just like the compiler would have
done if we place a call to an inline function within a if() { } block.

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-31 13:39   ` Andi Kleen
@ 2007-05-31 22:07     ` Mathieu Desnoyers
  2007-05-31 22:33       ` Andi Kleen
  0 siblings, 1 reply; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-31 22:07 UTC (permalink / raw)
  To: Andi Kleen; +Cc: akpm, linux-kernel

* Andi Kleen (andi@firstfloor.org) wrote:
> Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:
> >  	}
> > -	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
> > +	cond_call(profile_on,
> > +		profile_hit(SCHED_PROFILING, __builtin_return_address(0)));
> 
> Would it be possible to use a syntax like
> 
>         if (unlikely_cond_call(variable)) {     (or better name) 
>                 ...
>         } 
> 
> instead? I think that would be much nicer to read than having
> code in a macro argument
> 

I see your point, but there is a level of control on the branch I would
lack by doing so: the ability to put the call in either the if or else
branch. It is an optimization on i386.

I could do it by defining my home-made if() :

cond_if (cond_call_name) {
  code
}

The macro cond_if could then expand (this is a simplified example) in either in

if (cond)

or

if (cond)
else

Also, I live in the expectation that, someday, the gcc guys will be nice
enough to add some kind of support for a nop-based jump that would
require code patching to put a jump instead. If it ever happens, my
macro could evolve into this for newer compiler versions, which I could
not do with the if() statement you are proposing.


-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-31 22:07     ` Mathieu Desnoyers
@ 2007-05-31 22:33       ` Andi Kleen
  2007-06-04 22:20         ` Mathieu Desnoyers
  0 siblings, 1 reply; 57+ messages in thread
From: Andi Kleen @ 2007-05-31 22:33 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: Andi Kleen, akpm, linux-kernel

> I see your point, but there is a level of control on the branch I would
> lack by doing so: the ability to put the call in either the if or else
> branch. It is an optimization on i386.

What does it optimize exactly?

> Also, I live in the expectation that, someday, the gcc guys will be nice
> enough to add some kind of support for a nop-based jump that would
> require code patching to put a jump instead. If it ever happens, my
> macro could evolve into this for newer compiler versions, which I could
> not do with the if() statement you are proposing.

If that ever happens we couldn't use it anyways because Linux still
has to support old compilers for a long time. And when those are dropped the
code could be updated.

-Andi

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-31 21:12     ` Mathieu Desnoyers
@ 2007-05-31 23:41       ` William Lee Irwin III
  2007-06-04 22:20         ` Mathieu Desnoyers
  0 siblings, 1 reply; 57+ messages in thread
From: William Lee Irwin III @ 2007-05-31 23:41 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

On Wed, May 30, 2007 at 10:00:34AM -0400, Mathieu Desnoyers wrote:
>>> +	if (prof_on)
>>> +		BUG_ON(cond_call_arm("profile_on"));

* William Lee Irwin III (wli@holomorphy.com) wrote:
>> What's the point of this BUG_ON()? The condition is a priori impossible.

On Thu, May 31, 2007 at 05:12:58PM -0400, Mathieu Desnoyers wrote:
> Not impossible: hash_add_cond_call() can return -ENOMEM if kmalloc lacks
> memory.

Shouldn't it just propagate the errors like anything else instead of
going BUG(), then? One can easily live without profiling if the profile
buffers should fail to be allocated e.g. due to memory fragmentation.

These things all have to handle errors for hotplugging anyway AIUI.


-- wli

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-30 20:34   ` Andrew Morton
@ 2007-06-01 15:54     ` Mathieu Desnoyers
  2007-06-01 16:19       ` Andrew Morton
  0 siblings, 1 reply; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-01 15:54 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

* Andrew Morton (akpm@linux-foundation.org) wrote:
> On Wed, 30 May 2007 10:00:34 -0400
> Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:
> 
> > @@ -2990,7 +2991,8 @@
> >  			print_irqtrace_events(prev);
> >  		dump_stack();
> >  	}
> > -	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
> > +	cond_call(profile_on,
> > +		profile_hit(SCHED_PROFILING, __builtin_return_address(0)));
> >  
> 
> That's looking pretty neat.  Do you have any before-and-after performance
> figures for i386 and for a non-optimised architecture?

Sure, here is the result of a small test comparing:
1 - Branch depending on a cache miss (has to fetch in memory, caused by a 128
    bytes stride)). This is the test that is likely to look like what
    side-effect the original profile_hit code was causing, under the
    assumption that the kernel is already using L1 and L2 caches at
    their full capacity and that a supplementary data load would cause
    cache trashing.
2 - Branch depending on L1 cache hit. Just for comparison.
3 - Branch depending on a load immediate in the instruction stream.

It has been compiled with gcc -O2. Tests done on a 3GHz P4.

In the first test series, the branch is not taken:

number of tests : 1000
number of branches per test : 81920
memory hit cycles per iteration (mean) : 48.252
L1 cache hit cycles per iteration (mean) : 16.1693
instruction stream based test, cycles per iteration (mean) : 16.0432


In the second test series, the branch is taken and an integer is
incremented within the block:

number of tests : 1000
number of branches per test : 81920
memory hit cycles per iteration (mean) : 48.2691
L1 cache hit cycles per iteration (mean) : 16.396
instruction stream based test, cycles per iteration (mean) : 16.0441

Therefore, the memory fetch based test seems to be 200% slower than the
load immediate based test.

(I am adding these results to the documentation)

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-05-31 13:42   ` Andi Kleen
@ 2007-06-01 16:08     ` Matt Mackall
  2007-06-01 16:46       ` Mathieu Desnoyers
  0 siblings, 1 reply; 57+ messages in thread
From: Matt Mackall @ 2007-06-01 16:08 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Mathieu Desnoyers, akpm, linux-kernel

On Thu, May 31, 2007 at 03:42:50PM +0200, Andi Kleen wrote:
> Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:
> 
> > Reimplementation of the cond calls which uses a hash table to hold the active
> > cond_calls. It permits to first arm a cond_call and then load supplementary
> > modules that contain this cond_call.
> 
> Hash table is probably overkill. This is a very very slow path operation.
> Can you simplify the code? Just a linked list of all the condcall segments
> should be enough  and then walk it.

I think it could be greatly simplified by using symbols instead of
strings.

That is, doing cond_call(foo, func()) rather than cond_call("foo",
func()). Here foo is a structure or type holding the relevant info to
deal with the cond_call infrastructure. For unoptimized architectures,
it can simply be a bool, which will be faster.

This has the added advantage that the compiler will automatically pick
up any misspellings of these things. And it saves the space we'd use
on the hash table too.

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-06-01 15:54     ` Mathieu Desnoyers
@ 2007-06-01 16:19       ` Andrew Morton
  2007-06-01 16:33         ` Mathieu Desnoyers
  0 siblings, 1 reply; 57+ messages in thread
From: Andrew Morton @ 2007-06-01 16:19 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: linux-kernel

On Fri, 1 Jun 2007 11:54:13 -0400 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:

> * Andrew Morton (akpm@linux-foundation.org) wrote:
> > On Wed, 30 May 2007 10:00:34 -0400
> > Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:
> > 
> > > @@ -2990,7 +2991,8 @@
> > >  			print_irqtrace_events(prev);
> > >  		dump_stack();
> > >  	}
> > > -	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
> > > +	cond_call(profile_on,
> > > +		profile_hit(SCHED_PROFILING, __builtin_return_address(0)));
> > >  
> > 
> > That's looking pretty neat.  Do you have any before-and-after performance
> > figures for i386 and for a non-optimised architecture?
> 
> Sure, here is the result of a small test comparing:
> 1 - Branch depending on a cache miss (has to fetch in memory, caused by a 128
>     bytes stride)). This is the test that is likely to look like what
>     side-effect the original profile_hit code was causing, under the
>     assumption that the kernel is already using L1 and L2 caches at
>     their full capacity and that a supplementary data load would cause
>     cache trashing.
> 2 - Branch depending on L1 cache hit. Just for comparison.
> 3 - Branch depending on a load immediate in the instruction stream.
> 
> It has been compiled with gcc -O2. Tests done on a 3GHz P4.
> 
> In the first test series, the branch is not taken:
> 
> number of tests : 1000
> number of branches per test : 81920
> memory hit cycles per iteration (mean) : 48.252
> L1 cache hit cycles per iteration (mean) : 16.1693
> instruction stream based test, cycles per iteration (mean) : 16.0432
> 
> 
> In the second test series, the branch is taken and an integer is
> incremented within the block:
> 
> number of tests : 1000
> number of branches per test : 81920
> memory hit cycles per iteration (mean) : 48.2691
> L1 cache hit cycles per iteration (mean) : 16.396
> instruction stream based test, cycles per iteration (mean) : 16.0441
> 
> Therefore, the memory fetch based test seems to be 200% slower than the
> load immediate based test.

Confused.  From what did you calculate that 200%?

> (I am adding these results to the documentation)

Good, thanks.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-06-01 16:19       ` Andrew Morton
@ 2007-06-01 16:33         ` Mathieu Desnoyers
  0 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-01 16:33 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

* Andrew Morton (akpm@linux-foundation.org) wrote:
> On Fri, 1 Jun 2007 11:54:13 -0400 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:
> 
> > * Andrew Morton (akpm@linux-foundation.org) wrote:
> > > On Wed, 30 May 2007 10:00:34 -0400
> > > Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:
> > > 
> > > > @@ -2990,7 +2991,8 @@
> > > >  			print_irqtrace_events(prev);
> > > >  		dump_stack();
> > > >  	}
> > > > -	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
> > > > +	cond_call(profile_on,
> > > > +		profile_hit(SCHED_PROFILING, __builtin_return_address(0)));
> > > >  
> > > 
> > > That's looking pretty neat.  Do you have any before-and-after performance
> > > figures for i386 and for a non-optimised architecture?
> > 
> > Sure, here is the result of a small test comparing:
> > 1 - Branch depending on a cache miss (has to fetch in memory, caused by a 128
> >     bytes stride)). This is the test that is likely to look like what
> >     side-effect the original profile_hit code was causing, under the
> >     assumption that the kernel is already using L1 and L2 caches at
> >     their full capacity and that a supplementary data load would cause
> >     cache trashing.
> > 2 - Branch depending on L1 cache hit. Just for comparison.
> > 3 - Branch depending on a load immediate in the instruction stream.
> > 
> > It has been compiled with gcc -O2. Tests done on a 3GHz P4.
> > 
> > In the first test series, the branch is not taken:
> > 
> > number of tests : 1000
> > number of branches per test : 81920
> > memory hit cycles per iteration (mean) : 48.252
> > L1 cache hit cycles per iteration (mean) : 16.1693
> > instruction stream based test, cycles per iteration (mean) : 16.0432
> > 
> > 
> > In the second test series, the branch is taken and an integer is
> > incremented within the block:
> > 
> > number of tests : 1000
> > number of branches per test : 81920
> > memory hit cycles per iteration (mean) : 48.2691
> > L1 cache hit cycles per iteration (mean) : 16.396
> > instruction stream based test, cycles per iteration (mean) : 16.0441
> > 
> > Therefore, the memory fetch based test seems to be 200% slower than the
> > load immediate based test.
> 
> Confused.  From what did you calculate that 200%?
> 
> > (I am adding these results to the documentation)
> 
> Good, thanks.


(48.2691-16.0441)/16.0441 = 2.00

Which means that it is 200% slower to run this test while fetching the
branch condition from main memory rather than using the load immediate.

We could also put it like this : the speedup of the load immediate over
the memory fetch is 3.

48.2691/16.0441 = 3.00

Is there a preferred way to present these results in the documentation ?

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-06-01 16:08     ` Matt Mackall
@ 2007-06-01 16:46       ` Mathieu Desnoyers
  2007-06-01 17:07         ` Matt Mackall
  0 siblings, 1 reply; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-01 16:46 UTC (permalink / raw)
  To: Matt Mackall; +Cc: Andi Kleen, akpm, linux-kernel

* Matt Mackall (mpm@selenic.com) wrote:
> On Thu, May 31, 2007 at 03:42:50PM +0200, Andi Kleen wrote:
> > Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:
> > 
> > > Reimplementation of the cond calls which uses a hash table to hold the active
> > > cond_calls. It permits to first arm a cond_call and then load supplementary
> > > modules that contain this cond_call.
> > 
> > Hash table is probably overkill. This is a very very slow path operation.
> > Can you simplify the code? Just a linked list of all the condcall segments
> > should be enough  and then walk it.
> 
> I think it could be greatly simplified by using symbols instead of
> strings.
> 
> That is, doing cond_call(foo, func()) rather than cond_call("foo",
> func()). Here foo is a structure or type holding the relevant info to
> deal with the cond_call infrastructure. For unoptimized architectures,
> it can simply be a bool, which will be faster.
> 
> This has the added advantage that the compiler will automatically pick
> up any misspellings of these things. And it saves the space we'd use
> on the hash table too.
> 

The idea is interesting, but does not fit the problem: AFAIK, it will
not be possible to do multiple declarations of the same symbol, which is
needed whenever we want to declare a cond_call() more than once or to
embed it in an inline function.

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-06-01 16:46       ` Mathieu Desnoyers
@ 2007-06-01 17:07         ` Matt Mackall
  2007-06-01 17:45           ` Andi Kleen
  2007-06-01 18:03           ` Mathieu Desnoyers
  0 siblings, 2 replies; 57+ messages in thread
From: Matt Mackall @ 2007-06-01 17:07 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: Andi Kleen, akpm, linux-kernel

On Fri, Jun 01, 2007 at 12:46:23PM -0400, Mathieu Desnoyers wrote:
> * Matt Mackall (mpm@selenic.com) wrote:
> > On Thu, May 31, 2007 at 03:42:50PM +0200, Andi Kleen wrote:
> > > Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:
> > > 
> > > > Reimplementation of the cond calls which uses a hash table to hold the active
> > > > cond_calls. It permits to first arm a cond_call and then load supplementary
> > > > modules that contain this cond_call.
> > > 
> > > Hash table is probably overkill. This is a very very slow path operation.
> > > Can you simplify the code? Just a linked list of all the condcall segments
> > > should be enough  and then walk it.
> > 
> > I think it could be greatly simplified by using symbols instead of
> > strings.
> > 
> > That is, doing cond_call(foo, func()) rather than cond_call("foo",
> > func()). Here foo is a structure or type holding the relevant info to
> > deal with the cond_call infrastructure. For unoptimized architectures,
> > it can simply be a bool, which will be faster.
> > 
> > This has the added advantage that the compiler will automatically pick
> > up any misspellings of these things. And it saves the space we'd use
> > on the hash table too.
> > 
> 
> The idea is interesting, but does not fit the problem: AFAIK, it will
> not be possible to do multiple declarations of the same symbol, which is
> needed whenever we want to declare a cond_call() more than once or to
> embed it in an inline function.

It's not clear to me why either of those things are necessary. An
example please?

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-06-01 17:07         ` Matt Mackall
@ 2007-06-01 17:45           ` Andi Kleen
  2007-06-01 18:06             ` Mathieu Desnoyers
  2007-06-01 18:03           ` Mathieu Desnoyers
  1 sibling, 1 reply; 57+ messages in thread
From: Andi Kleen @ 2007-06-01 17:45 UTC (permalink / raw)
  To: Matt Mackall; +Cc: Mathieu Desnoyers, Andi Kleen, akpm, linux-kernel

> It's not clear to me why either of those things are necessary. An
> example please?

It's certainly possible that a global flag would need to be tested
more than once.

I guess it would work if a symbol is associated with a single
definition. e.g. if there is a DEFINE_COND_CALL() somewhere
and the individual cond calls reference it.

-Andi

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-06-01 17:07         ` Matt Mackall
  2007-06-01 17:45           ` Andi Kleen
@ 2007-06-01 18:03           ` Mathieu Desnoyers
  1 sibling, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-01 18:03 UTC (permalink / raw)
  To: Matt Mackall; +Cc: Andi Kleen, akpm, linux-kernel

* Matt Mackall (mpm@selenic.com) wrote:
> On Fri, Jun 01, 2007 at 12:46:23PM -0400, Mathieu Desnoyers wrote:
> > * Matt Mackall (mpm@selenic.com) wrote:
> > > On Thu, May 31, 2007 at 03:42:50PM +0200, Andi Kleen wrote:
> > > > Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:
> > > > 
> > > > > Reimplementation of the cond calls which uses a hash table to hold the active
> > > > > cond_calls. It permits to first arm a cond_call and then load supplementary
> > > > > modules that contain this cond_call.
> > > > 
> > > > Hash table is probably overkill. This is a very very slow path operation.
> > > > Can you simplify the code? Just a linked list of all the condcall segments
> > > > should be enough  and then walk it.
> > > 
> > > I think it could be greatly simplified by using symbols instead of
> > > strings.
> > > 
> > > That is, doing cond_call(foo, func()) rather than cond_call("foo",
> > > func()). Here foo is a structure or type holding the relevant info to
> > > deal with the cond_call infrastructure. For unoptimized architectures,
> > > it can simply be a bool, which will be faster.
> > > 
> > > This has the added advantage that the compiler will automatically pick
> > > up any misspellings of these things. And it saves the space we'd use
> > > on the hash table too.
> > > 
> > 
> > The idea is interesting, but does not fit the problem: AFAIK, it will
> > not be possible to do multiple declarations of the same symbol, which is
> > needed whenever we want to declare a cond_call() more than once or to
> > embed it in an inline function.
> 
> It's not clear to me why either of those things are necessary. An
> example please?
> 

Case where we want to declare the same cond_call multiple times :

function_a(int var)
{
  ...
  cond_call(profile_on, profile_hit(...));
  ...
}


function_b(int var, int var2)
{
  ...
  cond_call(profile_on, profile_hit(...));
  ...
}


Case in inline function :

static inline myinlinefct()
{
  ...
  cond_call(profile_on, profile_hit(...));
  ...
}


somefct()
{
  ...
  myinlinefct();
  ...
  myinlinefct();
  ...
}

Those will result in multiple declarations of the cond_call.


-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-06-01 17:45           ` Andi Kleen
@ 2007-06-01 18:06             ` Mathieu Desnoyers
  2007-06-01 18:49               ` Matt Mackall
  2007-06-01 19:35               ` Andi Kleen
  0 siblings, 2 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-01 18:06 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Matt Mackall, akpm, linux-kernel

* Andi Kleen (andi@firstfloor.org) wrote:
> > It's not clear to me why either of those things are necessary. An
> > example please?
> 
> It's certainly possible that a global flag would need to be tested
> more than once.
> 
> I guess it would work if a symbol is associated with a single
> definition. e.g. if there is a DEFINE_COND_CALL() somewhere
> and the individual cond calls reference it.
> 

Yes, but as you have probably understood, I want to have everything
embedded at the cond_call() site rather than polluting the rest of the
code with declarations.

Also, if we have the same cond_calls in different modules, in which
module shall it be defined ?

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-06-01 18:06             ` Mathieu Desnoyers
@ 2007-06-01 18:49               ` Matt Mackall
  2007-06-01 19:35               ` Andi Kleen
  1 sibling, 0 replies; 57+ messages in thread
From: Matt Mackall @ 2007-06-01 18:49 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: Andi Kleen, akpm, linux-kernel

On Fri, Jun 01, 2007 at 02:06:54PM -0400, Mathieu Desnoyers wrote:
> * Andi Kleen (andi@firstfloor.org) wrote:
> > > It's not clear to me why either of those things are necessary. An
> > > example please?
> > 
> > It's certainly possible that a global flag would need to be tested
> > more than once.
> > 
> > I guess it would work if a symbol is associated with a single
> > definition. e.g. if there is a DEFINE_COND_CALL() somewhere
> > and the individual cond calls reference it.
> 
> Yes, but as you have probably understood, I want to have everything
> embedded at the cond_call() site rather than polluting the rest of the
> code with declarations.

And you do so at the expensive of the ability to have compile-time
checks and the need to jump through a hash table at run-time. This
doesn't seem like a good trade-off.

Even if we -don't- do something like DEFINE_COND_CALL, it's still
probably a good idea to not use raw strings inline and to instead use
#defines. Raw strings are only slightly better than magic numbers.

> Also, if we have the same cond_calls in different modules, in which
> module shall it be defined ?

This isn't a new problem. It exists for every other type of object in
the kernel.

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-06-01 18:06             ` Mathieu Desnoyers
  2007-06-01 18:49               ` Matt Mackall
@ 2007-06-01 19:35               ` Andi Kleen
  2007-06-01 20:33                 ` Mathieu Desnoyers
  1 sibling, 1 reply; 57+ messages in thread
From: Andi Kleen @ 2007-06-01 19:35 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: Andi Kleen, Matt Mackall, akpm, linux-kernel

> Yes, but as you have probably understood, I want to have everything
> embedded at the cond_call() site rather than polluting the rest of the
> code with declarations.

A cond call is essentially a fancy variable. And the Linux kernel
is written in C and in C you declare variables before you use them.
Also it would allow compile time checking against typos and 
allow removing some nasty hash table code. The proposal sounds like a 
clear winner to me. 

> Also, if we have the same cond_calls in different modules, in which
> module shall it be defined ?

In one of them. Like all other variables in C.

-Andi

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-06-01 19:35               ` Andi Kleen
@ 2007-06-01 20:33                 ` Mathieu Desnoyers
  2007-06-01 20:44                   ` Andi Kleen
  0 siblings, 1 reply; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-01 20:33 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Matt Mackall, akpm, linux-kernel

* Andi Kleen (andi@firstfloor.org) wrote:
> > Yes, but as you have probably understood, I want to have everything
> > embedded at the cond_call() site rather than polluting the rest of the
> > code with declarations.
> 
> A cond call is essentially a fancy variable. And the Linux kernel
> is written in C and in C you declare variables before you use them.
> Also it would allow compile time checking against typos and 
> allow removing some nasty hash table code. The proposal sounds like a 
> clear winner to me. 
> 

You could not declare in advance a structure that would contain pointers
to every load immediate instruction of the optimized cond_calls. Unless
I'm wrong, this idea just can't be implemented in C.

> > Also, if we have the same cond_calls in different modules, in which
> > module shall it be defined ?
> 
> In one of them. Like all other variables in C.
> 

I understand that if we limit ourselves to applications like the two
toy examples I proposed (enabling profiling and bug fixups), it could
make sense to try to declare a variable somewhere and later use it in
the body of functions (except the fact that it cannot work, due to
incapacity to declare pointers to each load immediate instruction, as
stated above). Even if it would work, the main purpose here is to
support the Linux Kernel Markers, where the goal is to provide the
ability to declare a marker within the body of a function without
requiring more hassle than a printk, but with less performance impact
than the latter. Also, we would not want the whole kernel to recompile
simply because someone chose to add one or two marker in his own driver
to extract some more information and had to add them to some globally
included header file.

Therefore, due to the use of cond_calls within markers, I strongly
prefer the dynamic implementation I proposed to implementations
requiring additional variable declaration and frequent header changes.

Mathieu


-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-06-01 20:33                 ` Mathieu Desnoyers
@ 2007-06-01 20:44                   ` Andi Kleen
  2007-06-04 22:26                     ` Mathieu Desnoyers
  0 siblings, 1 reply; 57+ messages in thread
From: Andi Kleen @ 2007-06-01 20:44 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: Andi Kleen, Matt Mackall, akpm, linux-kernel

On Fri, Jun 01, 2007 at 04:33:06PM -0400, Mathieu Desnoyers wrote:
> * Andi Kleen (andi@firstfloor.org) wrote:
> > > Yes, but as you have probably understood, I want to have everything
> > > embedded at the cond_call() site rather than polluting the rest of the
> > > code with declarations.
> > 
> > A cond call is essentially a fancy variable. And the Linux kernel
> > is written in C and in C you declare variables before you use them.
> > Also it would allow compile time checking against typos and 
> > allow removing some nasty hash table code. The proposal sounds like a 
> > clear winner to me. 
> > 
> 
> You could not declare in advance a structure that would contain pointers
> to every load immediate instruction of the optimized cond_calls. Unless

To find them you just walk the sections.  Changing cond call is a slow
path operation. That is similar to how the smp lock switching
works today.

> I understand that if we limit ourselves to applications like the two
> toy examples I proposed (enabling profiling and bug fixups), it could
> make sense to try to declare a variable somewhere and later use it in
> the body of functions (except the fact that it cannot work, due to
> incapacity to declare pointers to each load immediate instruction, as
> stated above). Even if it would work, the main purpose here is to
> support the Linux Kernel Markers, where the goal is to provide the
> ability to declare a marker within the body of a function without
> requiring more hassle than a printk, but with less performance impact
> than the latter. Also, we would not want the whole kernel to recompile
> simply because someone chose to add one or two marker in his own driver
> to extract some more information and had to add them to some globally
> included header file.

Sounds similar to config.h then when Kconfig keeps track of those
dependencies for the CONFIG_*s and only recompiles what is needed. Perhaps 
this infrastructure could be reused.

-Andi  

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-05-30 14:00 ` [patch 1/9] Conditional Calls - Architecture Independent Code Mathieu Desnoyers
  2007-05-30 20:32   ` Andrew Morton
  2007-05-31 13:47   ` Andi Kleen
@ 2007-06-04 19:01   ` Adrian Bunk
  2007-06-13 15:57     ` Mathieu Desnoyers
  2 siblings, 1 reply; 57+ messages in thread
From: Adrian Bunk @ 2007-06-04 19:01 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

On Wed, May 30, 2007 at 10:00:26AM -0400, Mathieu Desnoyers wrote:
> Conditional calls are used to compile in code that is meant to be dynamically
> enabled at runtime. When code is disabled, it has a very small footprint.
> 
> It has a generic cond_call version and optimized per architecture cond_calls.
> The optimized cond_call uses a load immediate to remove a data cache hit.
> 
> It adds a new rodata section "__cond_call" to place the pointers to the enable
> value.
>...

I have two questions for getting the bigger picture:

1. How much code will be changed?
Looking at the F00F bug fixup example, it seems we'll have to make
several functions in every single driver conditional in the kernel for 
getting the best performance.
How many functions to you plan to make conditional this way?

2. What is the real-life performance improvement?
That micro benchmarks comparing cache hits with cache misses give great 
looking numbers is obvious.
But what will be the performance improvement in real workloads after the
functions you plan to make conditional according to question 1 have been 
made conditional?

TIA
Adrian

-- 

       "Is there not promise of rain?" Ling Tan asked suddenly out
        of the darkness. There had been need of rain for many days.
       "Only a promise," Lao Er said.
                                       Pearl S. Buck - Dragon Seed


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-31 22:33       ` Andi Kleen
@ 2007-06-04 22:20         ` Mathieu Desnoyers
  0 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-04 22:20 UTC (permalink / raw)
  To: Andi Kleen, Nicholas Mc Guire; +Cc: akpm, linux-kernel

* Andi Kleen (andi@firstfloor.org) wrote:
> > I see your point, but there is a level of control on the branch I would
> > lack by doing so: the ability to put the call in either the if or else
> > branch. It is an optimization on i386.
> 
> What does it optimize exactly?
> 

Nicholas McGuire told me that the non common cases should be put in
else branches of if statements for i386. At the time, I did a quick test
that correlated what he said, but I seem to be unable to reproduce this
behavior now (maybe my code snippet is too simple?): I will then assume
that the likely/unlikely (builtin expects) tells everything that is
needed to gcc until further notice. Therefore, we can use the form :

if (cond_call(var)), as you proposed.

> > Also, I live in the expectation that, someday, the gcc guys will be nice
> > enough to add some kind of support for a nop-based jump that would
> > require code patching to put a jump instead. If it ever happens, my
> > macro could evolve into this for newer compiler versions, which I could
> > not do with the if() statement you are proposing.
> 
> If that ever happens we couldn't use it anyways because Linux still
> has to support old compilers for a long time. And when those are dropped the
> code could be updated.
> 

Agreed.

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 9/9] Scheduler profiling - Use conditional calls
  2007-05-31 23:41       ` William Lee Irwin III
@ 2007-06-04 22:20         ` Mathieu Desnoyers
  0 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-04 22:20 UTC (permalink / raw)
  To: William Lee Irwin III; +Cc: akpm, linux-kernel

* William Lee Irwin III (wli@holomorphy.com) wrote:
> On Wed, May 30, 2007 at 10:00:34AM -0400, Mathieu Desnoyers wrote:
> >>> +	if (prof_on)
> >>> +		BUG_ON(cond_call_arm("profile_on"));
> 
> * William Lee Irwin III (wli@holomorphy.com) wrote:
> >> What's the point of this BUG_ON()? The condition is a priori impossible.
> 
> On Thu, May 31, 2007 at 05:12:58PM -0400, Mathieu Desnoyers wrote:
> > Not impossible: hash_add_cond_call() can return -ENOMEM if kmalloc lacks
> > memory.
> 
> Shouldn't it just propagate the errors like anything else instead of
> going BUG(), then? One can easily live without profiling if the profile
> buffers should fail to be allocated e.g. due to memory fragmentation.
> 
> These things all have to handle errors for hotplugging anyway AIUI.
> 

Cond call arming will not reserve memory anymore in the next release.
(hash table is gone).

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 2/9] Conditional Calls - Hash Table
  2007-06-01 20:44                   ` Andi Kleen
@ 2007-06-04 22:26                     ` Mathieu Desnoyers
  0 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-04 22:26 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Matt Mackall, akpm, linux-kernel

* Andi Kleen (andi@firstfloor.org) wrote:
> On Fri, Jun 01, 2007 at 04:33:06PM -0400, Mathieu Desnoyers wrote:
> > * Andi Kleen (andi@firstfloor.org) wrote:
> > > > Yes, but as you have probably understood, I want to have everything
> > > > embedded at the cond_call() site rather than polluting the rest of the
> > > > code with declarations.
> > > 
> > > A cond call is essentially a fancy variable. And the Linux kernel
> > > is written in C and in C you declare variables before you use them.
> > > Also it would allow compile time checking against typos and 
> > > allow removing some nasty hash table code. The proposal sounds like a 
> > > clear winner to me. 
> > > 
> > 
> > You could not declare in advance a structure that would contain pointers
> > to every load immediate instruction of the optimized cond_calls. Unless
> 
> To find them you just walk the sections.  Changing cond call is a slow
> path operation. That is similar to how the smp lock switching
> works today.
> 
> > I understand that if we limit ourselves to applications like the two
> > toy examples I proposed (enabling profiling and bug fixups), it could
> > make sense to try to declare a variable somewhere and later use it in
> > the body of functions (except the fact that it cannot work, due to
> > incapacity to declare pointers to each load immediate instruction, as
> > stated above). Even if it would work, the main purpose here is to
> > support the Linux Kernel Markers, where the goal is to provide the
> > ability to declare a marker within the body of a function without
> > requiring more hassle than a printk, but with less performance impact
> > than the latter. Also, we would not want the whole kernel to recompile
> > simply because someone chose to add one or two marker in his own driver
> > to extract some more information and had to add them to some globally
> > included header file.
> 
> Sounds similar to config.h then when Kconfig keeps track of those
> dependencies for the CONFIG_*s and only recompiles what is needed. Perhaps 
> this infrastructure could be reused.
> 

I took time to think about your proposal, and it's all good. the
cond_calls will now depend on a variable address and feed a if()
statement.

If conditional calls CONFIG_* option is disabled, the variable (an
integer) will feed the if().

If enabled, the optimized version will feed the if() with the load
immediate. The generic version will give the integer variable to the if.
(I document that this variable should be declared __read_mostly).

The markers, however, will declare this variable by themselves in their
macro, so it is transparent to the programmer.

All this will be implemented in the next release. Thanks for your
insightful comments!

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-05-31 13:47   ` Andi Kleen
@ 2007-06-05 18:40     ` Mathieu Desnoyers
  0 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-05 18:40 UTC (permalink / raw)
  To: Andi Kleen; +Cc: akpm, linux-kernel

* Andi Kleen (andi@firstfloor.org) wrote:
> Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:
> 
> > +struct __cond_call_struct {
> 
> Calling structs *_struct is severly deprecated and will cause some people
> to make fun of your code.
> 

ok

> 
> > +	const char *name;
> > +	void *enable;
> > +	int flags;
> > +} __attribute__((packed));
> 
> The packed doesn't seem to be needed. There will be padding at 
> the end anyways because the next one needs to be aligned.
> 
ok

> > +
> > +
> > +/* Cond call flags : selects the mechanism used to enable the conditional calls
> > + * and prescribe what can be executed within their function. This is primarily
> > + * used at reentrancy-unfriendly sites. */
> > +#define CF_OPTIMIZED		(1 << 0) /* Use optimized cond_call */
> > +#define CF_LOCKDEP		(1 << 1) /* Can call lockdep */
> > +#define CF_PRINTK		(1 << 2) /* Probe can call vprintk */
> > +#define CF_STATIC_ENABLE	(1 << 3) /* Enable cond_call statically */
> 
> Why is that all needed?  Condcall shouldn't really need to know anything
> about all this. They're just a fancy conditional anyways -- and you don't
> tell if() that it may need to printk.
> 
> Please consider eliminating.
> 

I will remove the STATIC_ENABLE and the PRINTK, but I will leave the
CF_LOCKDEP and CF_OPTIMIZED there: they are required to let the generic
version be selected in contexts where a breakpoint cannot be used on
x86 (especially when placing a cond_call within lockdep.c code or any
code that could not afford to fall into a breakpoint handler).

> 
> 
> > +#define _CF_NR			4
> 
> 
> > +
> > +#ifdef CONFIG_COND_CALL
> > +
> > +/* Generic cond_call flavor always available.
> > + * Note : the empty asm volatile with read constraint is used here instead of a
> > + * "used" attribute to fix a gcc 4.1.x bug. */
> 
> What gcc 4.1 bug? 
> 

Please see

http://www.ecos.sourceware.org/ml/systemtap/2006-q4/msg00146.html

for Jeremy Fitzhardinge's comment on the issue. I will add some comments
in the code.

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 5/9] Conditional Calls - i386 Optimization
  2007-05-31 13:54   ` Andi Kleen
@ 2007-06-05 19:02     ` Mathieu Desnoyers
  0 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-05 19:02 UTC (permalink / raw)
  To: Andi Kleen; +Cc: akpm, linux-kernel

* Andi Kleen (andi@firstfloor.org) wrote:
> Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:
> 
> > +#define cond_call_optimized(flags, name, func) \
> > +	({ \
> > +		static const char __cstrtab_name_##name[] \
> > +		__attribute__((section("__cond_call_strings"))) = #name; \
> > +		char condition; \
> > +		asm (	".section __cond_call, \"a\", @progbits;\n\t" \
> > +					".long %1, 0f, %2;\n\t" \
> > +					".previous;\n\t" \
> > +					".align 2\n\t" \
> > +					"0:\n\t" \
> > +					"movb %3,%0;\n\t" \
> > +				: "=r" (condition) \
> > +				: "m" (*__cstrtab_name_##name), \
> > +				  "m" (*(char*)flags), \
> > +				  "i" ((flags) & CF_STATIC_ENABLE)); \
> 
> Remind me what we need the flags again for? 
> 
> I would prefer to just eliminate them and always require arming.
> 

The CF_STATIC_ENABLE could be used to have a cond_call compiled as
active, and could be later desactivated dynamically. Since I don't see
much use to this, I will remove this flag.


> 
> > +		(likely(!condition)) ? \
> > +			(__typeof__(func))0 : \
> > +			(func); \
> > +	})
> > +
> > +/* cond_call macro selecting the generic or optimized version of cond_call,
> > + * depending on the flags specified. */
> > +#define _cond_call(flags, name, func) \
> > +({ \
> > +	(((flags) & CF_LOCKDEP) && ((flags) & CF_OPTIMIZED)) ? \
> 
> Similar here? unoptimized condcalls don't make much sense.
> I also don't understand what the LOCKDEP flag is good for.
> 

unoptimized condcalls will now be a simple static variable usage (since
the new condcalls will simply be an infrastructure for synchronizing a
static variable with load immediates).


> > Index: linux-2.6-lttng/arch/i386/kernel/condcall.c
> > ===================================================================
> > --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> > +++ linux-2.6-lttng/arch/i386/kernel/condcall.c	2007-05-17 01:52:38.000000000 -0400
> > @@ -0,0 +1,140 @@
> > +/* condcall.c
> > + *
> > + * - Erratum 49 fix for Intel PIII.
> > + * - Still present on newer processors : Intel Core 2 Duo Processor for Intel
> > + *   Centrino Duo Processor Technology Specification Update, AH33.
> > + *   Unsynchronized Cross-Modifying Code Operations Can Cause Unexpected
> > + *   Instruction Execution Results.
> 
> Can you please define a generic utility function to do self modifying
> code? There are other places that do it too.
> 

If you are talking about paravirt ops, they seem to only modify code
on the 1st CPU before other CPUs are turned on. But I might be wrong.

Also, there are 2 simplifications here which do not address the general
case. Those simplifications are done so I can use the cond_calls within
the markers in as many code paths as possible.

1 - When the breakpoint is used (x86), I simply skip the load immediate
instead of single-stepping the instruction. I can do this because I know
that the only effect that the load immediate has is to populate a
register. Since I make sure that I am doing a transition 0 <-> !0, I can
afford leaving this register in an unknown state during the transition.

2 - In order to patch the code on other architectures too, I have to
provide atomic instruction modification. It heavily depends on the
instruction size and their alignment. Once again, I would have to use
breakpoints and single-stepping to replace the instructions (in every
architecture), instead of doing a simple operation replacement with
memcpy.

Because I control the alignment and size of the instructions I replace,
I do not depend on breakpoints (except on x86-like architectures), which
makes things much more solid reentrancy-wise and lets me use the
condcalls within the markers in tricky code paths.

> > +static DEFINE_MUTEX(cond_call_mutex);
> 
> All locks need a comment describing what they protect and the hierarchy if 
> any.
> 
ok

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-06-04 19:01   ` Adrian Bunk
@ 2007-06-13 15:57     ` Mathieu Desnoyers
  2007-06-13 21:51       ` Adrian Bunk
  0 siblings, 1 reply; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-13 15:57 UTC (permalink / raw)
  To: Adrian Bunk; +Cc: akpm, linux-kernel

Hi Adrian,

* Adrian Bunk (bunk@stusta.de) wrote:

> I have two questions for getting the bigger picture:
>
> 1. How much code will be changed?
> Looking at the F00F bug fixup example, it seems we'll have to make
> several functions in every single driver conditional in the kernel for
> getting the best performance.
> How many functions to you plan to make conditional this way?
>

I just changed the infrastructure to match Andi's advice : the
cond_calls are now "fancy" variables : they refer to a static variable
address, and every update (which must be done through the cond call API)
changes every load immediate referring to this variable. Therefore, they
can be simply embedded in a if(cond_call(var)) statement, so there is no
big code change to do.


> 2. What is the real-life performance improvement?
> That micro benchmarks comparing cache hits with cache misses give great 
> looking numbers is obvious.
> But what will be the performance improvement in real workloads after the
> functions you plan to make conditional according to question 1 have been 
> made conditional?
> 

Hrm, I am trying to get interesting numbers out of lmbench: I just ran a
test on a kernel sprinkled with about 50 markers at important sites
(LTTng markers: system call entry/exit, traps, interrupt handlers, ...).
The markers are compiled-in, but in "disabled state". Since the markers
re-use the cond_call infrastructure, each marker has its own cond_call.

I ran the test in two situations on my Pentium 4 box:

1 - Cond call optimizations are disabled. This is the equivalent of
using a global variable (in the kernel data) as a condition for the
branching.

2 - Cond call optimizations are enabled. It uses the load immediate
(which is now loading an integer on x86 instead of a char, to make sure
there is no pipeline stall due to false register dependency).

The results are that we really cannot tell that one is faster/slower
than the other; the standard deviation is much higher than the
difference between the two situations.

Note that lmbench is a workload that will not trigger much L1 cache
stress, since it repeats the same tests many times. Do you have any
suggestion of a test that would be more representative of a real
diversified (in term of in-kernel locality of reference) workload ?

Thanks,

Mathieu


> TIA
> Adrian
> 
> -- 
> 
>        "Is there not promise of rain?" Ling Tan asked suddenly out
>         of the darkness. There had been need of rain for many days.
>        "Only a promise," Lao Er said.
>                                        Pearl S. Buck - Dragon Seed
> 

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-06-13 15:57     ` Mathieu Desnoyers
@ 2007-06-13 21:51       ` Adrian Bunk
  2007-06-14 16:02         ` Mathieu Desnoyers
  0 siblings, 1 reply; 57+ messages in thread
From: Adrian Bunk @ 2007-06-13 21:51 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

On Wed, Jun 13, 2007 at 11:57:24AM -0400, Mathieu Desnoyers wrote:
> Hi Adrian,

Hi Mathieu,

>...
> > 2. What is the real-life performance improvement?
> > That micro benchmarks comparing cache hits with cache misses give great 
> > looking numbers is obvious.
> > But what will be the performance improvement in real workloads after the
> > functions you plan to make conditional according to question 1 have been 
> > made conditional?
> 
> Hrm, I am trying to get interesting numbers out of lmbench: I just ran a
> test on a kernel sprinkled with about 50 markers at important sites
> (LTTng markers: system call entry/exit, traps, interrupt handlers, ...).
> The markers are compiled-in, but in "disabled state". Since the markers
> re-use the cond_call infrastructure, each marker has its own cond_call.
>...
> The results are that we really cannot tell that one is faster/slower
> than the other; the standard deviation is much higher than the
> difference between the two situations.
> 
> Note that lmbench is a workload that will not trigger much L1 cache
> stress, since it repeats the same tests many times. Do you have any
> suggestion of a test that would be more representative of a real
> diversified (in term of in-kernel locality of reference) workload ?

Please correct me if I'm wrong, but I think 50 markers couldn't ever 
result in a visible change:

You need a change that is big enough that it has a measurable influence 
on the cache hit ratio.

I don't think you could get any measurable influence unless you get into 
areas where > 10% of all code are conditional. And that's a percentage 
I wouldn't consider being realistically.

And one big disadvantage of your implementation is the dependency on 
MODULES. If you build all driver statically into the kernel, switching 
from CONFIG_MODULES=y to CONFIG_MODULES=n already gives you for free a 
functionally equivalent kernel that is smaller by at about 8% (depending 
on the .config).

My impression is that your patches would add an infrastructure for a 
nice sounding idea that will never have any real life effect.

> Thanks,
> 
> Mathieu

cu
Adrian

-- 

       "Is there not promise of rain?" Ling Tan asked suddenly out
        of the darkness. There had been need of rain for many days.
       "Only a promise," Lao Er said.
                                       Pearl S. Buck - Dragon Seed


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-06-13 21:51       ` Adrian Bunk
@ 2007-06-14 16:02         ` Mathieu Desnoyers
  2007-06-14 21:06           ` Adrian Bunk
  0 siblings, 1 reply; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-14 16:02 UTC (permalink / raw)
  To: Adrian Bunk; +Cc: akpm, linux-kernel

* Adrian Bunk (bunk@stusta.de) wrote:
> On Wed, Jun 13, 2007 at 11:57:24AM -0400, Mathieu Desnoyers wrote:
> > Hi Adrian,
> 
> Hi Mathieu,
> 
> >...
> > > 2. What is the real-life performance improvement?
> > > That micro benchmarks comparing cache hits with cache misses give great 
> > > looking numbers is obvious.
> > > But what will be the performance improvement in real workloads after the
> > > functions you plan to make conditional according to question 1 have been 
> > > made conditional?
> > 
> > Hrm, I am trying to get interesting numbers out of lmbench: I just ran a
> > test on a kernel sprinkled with about 50 markers at important sites
> > (LTTng markers: system call entry/exit, traps, interrupt handlers, ...).
> > The markers are compiled-in, but in "disabled state". Since the markers
> > re-use the cond_call infrastructure, each marker has its own cond_call.
> >...
> > The results are that we really cannot tell that one is faster/slower
> > than the other; the standard deviation is much higher than the
> > difference between the two situations.
> > 
> > Note that lmbench is a workload that will not trigger much L1 cache
> > stress, since it repeats the same tests many times. Do you have any
> > suggestion of a test that would be more representative of a real
> > diversified (in term of in-kernel locality of reference) workload ?
> 
> Please correct me if I'm wrong, but I think 50 markers couldn't ever 
> result in a visible change:
> 

Well, we must take into account where these markers are added and how
often the marked code is run. Since I mark very highly used code paths
(interrupt handlers, page faults, lockdep code) and also plan to mark
other code paths like the VM subsystem, adding cycles to these code
paths seems like a no-go solution for standard distribution kernels.

> You need a change that is big enough that it has a measurable influence 
> on the cache hit ratio.
> 
> I don't think you could get any measurable influence unless you get into 
> areas where > 10% of all code are conditional. And that's a percentage 
> I wouldn't consider being realistically.
> 

I just constructed a simple workload that exacerbates the improvement
brought by the optimized conditional calls:

- I instrument kernel/irq/hanle.c:handle_IRQ_event() by disabling
  interrupts, getting 2 cycle counter counts and incrementing the number
  of events logged by 1 and then reenabling interrupts.
- I create a small userspace program that writes to 1MB memory buffers
  in a loop, simulating a memory bound user-space workload.
- I get the avg. number of cycles spent per IRQ between the cycle
  counter reads.
- I put 4 markers in kernel/irq/hanle.c:handle_IRQ_event() between the
  cycles counter reads.
- I get the avg number of cycles with immediate value based markers and
  with static variable based markers, under an idle system and while
  running my user-space program causing memory pressure. Markers are in
  their disabled state.

These tests are conducted on a 3Ghz Pentium 4.

Results : (units are in cycles/interrupt)

Test                          | Idle system | With memory pressure
---------------------------------------------------------------------
Markers compiled out          | 100.47      | 100.27
Immediate value-based markers | 100.22      | 100.16
Static variable-based markers | 100.71      | 105.84

It shows that adding 4 markers does not add a visible impact to this
code path, but that using static variable-based markers adds 5 cycles.
Typical interrupt handler duration, taken from a LTTng trace, are in the
13k cycles range, so we guess 5 cycles does not add much externally
visible impact to this code path. However, if we plan to use markers to
instrument the VM subsystem, lockdep or, like dtrace, every function
entry/exit, it could be worthwhile to have an unmeasurable effect on
performances.

> And one big disadvantage of your implementation is the dependency on 
> MODULES. If you build all driver statically into the kernel, switching 
> from CONFIG_MODULES=y to CONFIG_MODULES=n already gives you for free a 
> functionally equivalent kernel that is smaller by at about 8% (depending 
> on the .config).
> 

Yes, you are right. I have put my code in module.c only because I need
to take a lock on module_mutex which is statically declared in module.c.
If declaring this lock globally could be considered as an elegant
solution, then I'll be more than happy to put my code in my own new
kernel/condcall.c file. I would remove the dependency on CONFIG_MODULES.
Does it make sense ?

> My impression is that your patches would add an infrastructure for a 
> nice sounding idea that will never have any real life effect.
> 

People can get really picky when they have to decide wether or not they
compile-in a profiling or tracing infrastructure in a distribution
kernel.  If the impact is detectable when they are not doing any tracing
nor profiling, their reflex will be to compile it out so they can have
the "maximum performance". This is why I am going through the trouble of
making the markers impact as small as possible.

Mathieu


> > Thanks,
> > 
> > Mathieu
> 
> cu
> Adrian
> 
> -- 
> 
>        "Is there not promise of rain?" Ling Tan asked suddenly out
>         of the darkness. There had been need of rain for many days.
>        "Only a promise," Lao Er said.
>                                        Pearl S. Buck - Dragon Seed
> 

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-06-14 16:02         ` Mathieu Desnoyers
@ 2007-06-14 21:06           ` Adrian Bunk
  2007-06-20 21:59             ` Mathieu Desnoyers
  0 siblings, 1 reply; 57+ messages in thread
From: Adrian Bunk @ 2007-06-14 21:06 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

On Thu, Jun 14, 2007 at 12:02:42PM -0400, Mathieu Desnoyers wrote:
>...
> Well, we must take into account where these markers are added and how
> often the marked code is run. Since I mark very highly used code paths
> (interrupt handlers, page faults, lockdep code) and also plan to mark
> other code paths like the VM subsystem, adding cycles to these code
> paths seems like a no-go solution for standard distribution kernels.
>...
> People can get really picky when they have to decide wether or not they
> compile-in a profiling or tracing infrastructure in a distribution
> kernel.  If the impact is detectable when they are not doing any tracing
> nor profiling, their reflex will be to compile it out so they can have
> the "maximum performance". This is why I am going through the trouble of
> making the markers impact as small as possible.

Now that we finally hear what this code is required for, can we discuss 
on this basis whether this is wanted and required?

Including the question which abuse possibilities such an infrastructure 
offers, and whether this is wanted in distribution kernels.

> Mathieu

cu
Adrian

-- 

       "Is there not promise of rain?" Ling Tan asked suddenly out
        of the darkness. There had been need of rain for many days.
       "Only a promise," Lao Er said.
                                       Pearl S. Buck - Dragon Seed


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-06-14 21:06           ` Adrian Bunk
@ 2007-06-20 21:59             ` Mathieu Desnoyers
  2007-06-21 13:00               ` Adrian Bunk
  0 siblings, 1 reply; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-20 21:59 UTC (permalink / raw)
  To: Adrian Bunk; +Cc: akpm, linux-kernel

* Adrian Bunk (bunk@stusta.de) wrote:
> On Thu, Jun 14, 2007 at 12:02:42PM -0400, Mathieu Desnoyers wrote:
> >...
> > Well, we must take into account where these markers are added and how
> > often the marked code is run. Since I mark very highly used code paths
> > (interrupt handlers, page faults, lockdep code) and also plan to mark
> > other code paths like the VM subsystem, adding cycles to these code
> > paths seems like a no-go solution for standard distribution kernels.
> >...
> > People can get really picky when they have to decide wether or not they
> > compile-in a profiling or tracing infrastructure in a distribution
> > kernel.  If the impact is detectable when they are not doing any tracing
> > nor profiling, their reflex will be to compile it out so they can have
> > the "maximum performance". This is why I am going through the trouble of
> > making the markers impact as small as possible.
> 
> Now that we finally hear what this code is required for, can we discuss 
> on this basis whether this is wanted and required?
> 
> Including the question which abuse possibilities such an infrastructure 
> offers, and whether this is wanted in distribution kernels.
> 

Hi Adrian,

The purpose of this infrastructure has never been a secret; it is a
piece taken from the Linux Kernel Markers. I proposed the first
implementation of markers in December 2006.

Please use the following link as a starting point to the thorough
discussion that has already been held on this matter.

First, a huge discussion thread back in November 2006, where the need
for a marker infrastructure has been recognized:
http://lwn.net/Articles/200059/

A good summary of my recent previous post on kerneltrap:
http://kerneltrap.org/node/8186

If you have new specific concerns to bring forward, I will be glad to
discuss them with you.

Regards,

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-06-20 21:59             ` Mathieu Desnoyers
@ 2007-06-21 13:00               ` Adrian Bunk
  2007-06-21 13:55                 ` Mathieu Desnoyers
  0 siblings, 1 reply; 57+ messages in thread
From: Adrian Bunk @ 2007-06-21 13:00 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: akpm, linux-kernel

On Wed, Jun 20, 2007 at 05:59:27PM -0400, Mathieu Desnoyers wrote:
> * Adrian Bunk (bunk@stusta.de) wrote:
> > On Thu, Jun 14, 2007 at 12:02:42PM -0400, Mathieu Desnoyers wrote:
> > >...
> > > Well, we must take into account where these markers are added and how
> > > often the marked code is run. Since I mark very highly used code paths
> > > (interrupt handlers, page faults, lockdep code) and also plan to mark
> > > other code paths like the VM subsystem, adding cycles to these code
> > > paths seems like a no-go solution for standard distribution kernels.
> > >...
> > > People can get really picky when they have to decide wether or not they
> > > compile-in a profiling or tracing infrastructure in a distribution
> > > kernel.  If the impact is detectable when they are not doing any tracing
> > > nor profiling, their reflex will be to compile it out so they can have
> > > the "maximum performance". This is why I am going through the trouble of
> > > making the markers impact as small as possible.
> > 
> > Now that we finally hear what this code is required for, can we discuss 
> > on this basis whether this is wanted and required?
> > 
> > Including the question which abuse possibilities such an infrastructure 
> > offers, and whether this is wanted in distribution kernels.
> 
> Hi Adrian,

Hi Mathieu,

> The purpose of this infrastructure has never been a secret; it is a
> piece taken from the Linux Kernel Markers. I proposed the first
> implementation of markers in December 2006.
> 
> Please use the following link as a starting point to the thorough
> discussion that has already been held on this matter.
> 
> First, a huge discussion thread back in November 2006, where the need
> for a marker infrastructure has been recognized:
> http://lwn.net/Articles/200059/
> 
> A good summary of my recent previous post on kerneltrap:
> http://kerneltrap.org/node/8186
> 
> If you have new specific concerns to bring forward, I will be glad to
> discuss them with you.

sorry if I was a bit harsh, but at least for me it wasn't clear that the 
main (and perhaps only) reasonable use case for your conditional calls 
was to get markers enabled in distribution kernels.

Please correct me, but my understanding is:
- conditional calls aim at getting markers enabled in distribution
  kernels
- markers are a valuable debugging tool
- you don't need markers during normal operation
- markers allow normal modules doing things they shouldn't be doing,
  implying that markers should _not_ be enabled in normal distribution
  kernels

> Regards,
> 
> Mathieu

cu
Adrian

-- 

       "Is there not promise of rain?" Ling Tan asked suddenly out
        of the darkness. There had been need of rain for many days.
       "Only a promise," Lao Er said.
                                       Pearl S. Buck - Dragon Seed


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-06-21 13:00               ` Adrian Bunk
@ 2007-06-21 13:55                 ` Mathieu Desnoyers
  0 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-06-21 13:55 UTC (permalink / raw)
  To: Adrian Bunk; +Cc: akpm, linux-kernel, mbligh

* Adrian Bunk (bunk@stusta.de) wrote:
> On Wed, Jun 20, 2007 at 05:59:27PM -0400, Mathieu Desnoyers wrote:
> > * Adrian Bunk (bunk@stusta.de) wrote:
> > > On Thu, Jun 14, 2007 at 12:02:42PM -0400, Mathieu Desnoyers wrote:
> > > >...
> > > > Well, we must take into account where these markers are added and how
> > > > often the marked code is run. Since I mark very highly used code paths
> > > > (interrupt handlers, page faults, lockdep code) and also plan to mark
> > > > other code paths like the VM subsystem, adding cycles to these code
> > > > paths seems like a no-go solution for standard distribution kernels.
> > > >...
> > > > People can get really picky when they have to decide wether or not they
> > > > compile-in a profiling or tracing infrastructure in a distribution
> > > > kernel.  If the impact is detectable when they are not doing any tracing
> > > > nor profiling, their reflex will be to compile it out so they can have
> > > > the "maximum performance". This is why I am going through the trouble of
> > > > making the markers impact as small as possible.
> > > 
> > > Now that we finally hear what this code is required for, can we discuss 
> > > on this basis whether this is wanted and required?
> > > 
> > > Including the question which abuse possibilities such an infrastructure 
> > > offers, and whether this is wanted in distribution kernels.
> > 
> > Hi Adrian,
> 
> Hi Mathieu,
> 
> > The purpose of this infrastructure has never been a secret; it is a
> > piece taken from the Linux Kernel Markers. I proposed the first
> > implementation of markers in December 2006.
> > 
> > Please use the following link as a starting point to the thorough
> > discussion that has already been held on this matter.
> > 
> > First, a huge discussion thread back in November 2006, where the need
> > for a marker infrastructure has been recognized:
> > http://lwn.net/Articles/200059/
> > 
> > A good summary of my recent previous post on kerneltrap:
> > http://kerneltrap.org/node/8186
> > 
> > If you have new specific concerns to bring forward, I will be glad to
> > discuss them with you.
> 
> sorry if I was a bit harsh, but at least for me it wasn't clear that the 
> main (and perhaps only) reasonable use case for your conditional calls 
> was to get markers enabled in distribution kernels.
> 
> Please correct me, but my understanding is:
> - conditional calls aim at getting markers enabled in distribution
>   kernels

Hi Adrian,

Putting markers in a distribution kernel would be of great benefit to
many users, this is true. The issue is not whether distros use it or
not, but whether we can allow users, poweruser to senior kernel
developer, even if they compile their own kernel, to turn on a tracing
infrastructure on-the-fly to study a problem in their system (problem
coming either from user-space, kernel, hypervisor...).

The key aspect here is to provide instrumentation of every privilege
level.

> - markers are a valuable debugging tool

If ptrace() is also called a valuable debugging tool, then yes. Does it
make it less suitable for distributions ? The main goal here is not to
be used as a debugging tool by kernel developers, this is just a nice
side-effect.  The main purpose of this tracer is to give an overall view
of the system activity to help _userspace_ developers determine what is
going wrong with the complex interactions between their multithreaded
application, X server and network sockets running 8 CPUs and using a
distributed FS...  the more interaction we have between (many) processes
and the OS, the harder it becomes to study that with the traditional
ptrace() approach. When looking for the cause of a slowdown, people
often just does not know even _which_ process is guilty for the problem,
or if they should blame the kernel.


> - you don't need markers during normal operation

False. Considering that application development is part of what I call
"normal operation", these tools are needed not just as part of a
particular kernel debug option.

Moreover, we have some stories ready (see upcoming Martin Bligh's
presentation/paper next week at OLS2007 "Linux Kernel Debugging on
Google-Sized Clusters") showing that some nasty problems a reproducible
so rarely, and only when monitored on such a great number of machines,
that they become nearly impossible to track without a preexisting
tracing infrastructure deployed on production machines.


> - markers allow normal modules doing things they shouldn't be doing,
>   implying that markers should _not_ be enabled in normal distribution
>   kernels
> 

Not exactly.

1 - Markers are flexible enough to permit defining a custom probe that
can be loaded dynamically as a module, but the "standard" probe will be
coming in some of my awaiting patches : it parses the format string to
serialize the information into trace buffers. Therefore, the "standard"
marker usage won't require people to write their own module; just to
enable which marker they want.

2 - kprobes is an example of an infrastructure enabled in distribution
kernels (Redhat at least) that permits calling modules from arbitrary
breakpoints. Markers somewhat limits this by specifying where the
markers are, therefore making the mechanism faster, offers better
reentrancy characteristics and is better suited to a kernel tree in
constant evolution (markers follow the kernel code and are not
maintained as a separate mapping to fixed kernel symbols/addresses).

3 - Providing a probe implies using a function that is exported as
*_GPL(). What exactly is such a module not supposed to be doing? Are
your expressing some kind of twisted pseudo-security concern? Once your
are executing code in kernel space with write access in memory, you can
do pretty much anything. I think that the right way to do it is to
provide the flexibility required to help users do what they need to do
(along with guide lines to help them not shoot themselves in the foot),
but not _restrict_ what is exported to GPL modules just because one
_can_ shoot himself in the foot. Since when are we supposed to provide
protection against modules? Should they run in user-space?

Redards,

Mathieu


> > Regards,
> > 
> > Mathieu
> 
> cu
> Adrian
> 
> -- 
> 
>        "Is there not promise of rain?" Ling Tan asked suddenly out
>         of the darkness. There had been need of rain for many days.
>        "Only a promise," Lao Er said.
>                                        Pearl S. Buck - Dragon Seed
> 

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [patch 1/9] Conditional Calls - Architecture Independent Code
  2007-05-29 18:33 [patch 0/9] Conditional Calls Mathieu Desnoyers
@ 2007-05-29 18:33 ` Mathieu Desnoyers
  0 siblings, 0 replies; 57+ messages in thread
From: Mathieu Desnoyers @ 2007-05-29 18:33 UTC (permalink / raw)
  To: akpm, linux-kernel; +Cc: Mathieu Desnoyers

[-- Attachment #1: conditional-calls-architecture-independent-code.patch --]
[-- Type: text/plain, Size: 14948 bytes --]

Conditional calls are used to compile in code that is meant to be dynamically
enabled at runtime. When code is disabled, it has a very small footprint.

It has a generic cond_call version and optimized per architecture cond_calls.
The optimized cond_call uses a load immediate to remove a data cache hit.

It adds a new rodata section "__cond_call" to place the pointers to the enable
value.

cond_call activation functions sits in module.c.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
 include/asm-generic/vmlinux.lds.h |   16 +-
 include/linux/condcall.h          |   91 +++++++++++++
 include/linux/module.h            |    4 
 kernel/module.c                   |  248 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 356 insertions(+), 3 deletions(-)

Index: linux-2.6-lttng/include/linux/condcall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/linux/condcall.h	2007-05-17 02:13:53.000000000 -0400
@@ -0,0 +1,91 @@
+#ifndef _LINUX_CONDCALL_H
+#define _LINUX_CONDCALL_H
+
+/*
+ * Conditional function calls. Cheap when disabled, enabled at runtime.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#ifdef __KERNEL__
+
+struct __cond_call_struct {
+	const char *name;
+	void *enable;
+	int flags;
+} __attribute__((packed));
+
+
+/* Cond call flags : selects the mechanism used to enable the conditional calls
+ * and prescribe what can be executed within their function. This is primarily
+ * used at reentrancy-unfriendly sites. */
+#define CF_OPTIMIZED		(1 << 0) /* Use optimized cond_call */
+#define CF_LOCKDEP		(1 << 1) /* Can call lockdep */
+#define CF_PRINTK		(1 << 2) /* Probe can call vprintk */
+#define CF_STATIC_ENABLE	(1 << 3) /* Enable cond_call statically */
+#define _CF_NR			4
+
+#ifdef CONFIG_COND_CALL
+
+/* Generic cond_call flavor always available.
+ * Note : the empty asm volatile with read constraint is used here instead of a
+ * "used" attribute to fix a gcc 4.1.x bug. */
+#define cond_call_generic(flags, name, func) \
+	({ \
+		static const char __cstrtab_name_##name[] \
+		__attribute__((section("__cond_call_strings"))) = #name; \
+		static char __cond_call_enable_##name = \
+			(flags) & CF_STATIC_ENABLE; \
+		static const struct __cond_call_struct __cond_call_##name \
+			__attribute__((section("__cond_call"))) = \
+			{ __cstrtab_name_##name, \
+			&__cond_call_enable_##name, \
+			(flags) & ~CF_OPTIMIZED } ; \
+		asm volatile ( "" : : "i" (&__cond_call_##name)); \
+		(unlikely(__cond_call_enable_##name)) ? \
+			(func) : \
+			(__typeof__(func))0; \
+	})
+
+#define COND_CALL_GENERIC_ENABLE_IMMEDIATE_OFFSET 0
+#define COND_CALL_GENERIC_ENABLE_TYPE unsigned char
+/* Dereference enable as lvalue from a pointer to its instruction */
+#define COND_CALL_GENERIC_ENABLE(a) \
+	*(COND_CALL_GENERIC_ENABLE_TYPE*) \
+		((char*)a+COND_CALL_GENERIC_ENABLE_IMMEDIATE_OFFSET)
+
+static inline int cond_call_generic_set_enable(void *address, char enable)
+{
+	COND_CALL_GENERIC_ENABLE(address) = enable;
+	return 0;
+}
+
+#else /* !CONFIG_COND_CALL */
+#define cond_call_generic(flags, name, func) \
+	({ \
+		((flags) & CF_STATIC_ENABLE) ? \
+			(func) : \
+			(__typeof__(func))0; \
+	})
+#endif /* CONFIG_COND_CALL */
+
+#ifdef CONFIG_COND_CALL_ENABLE_OPTIMIZATION
+#include <asm/condcall.h>		/* optimized cond_call flavor */
+#else
+#include <asm-generic/condcall.h>	/* fallback on generic cond_call */
+#endif
+
+#define COND_CALL_MAX_FORMAT_LEN	1024
+
+extern int cond_call_arm(const char *name);
+extern int cond_call_disarm(const char *name);
+
+/* cond_call_query : Returns 1 if enabled, 0 if disabled or not present */
+extern int cond_call_query(const char *name);
+extern int cond_call_list(const char *name);
+
+#endif /* __KERNEL__ */
+#endif
Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h	2007-05-17 02:12:25.000000000 -0400
+++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h	2007-05-17 02:13:42.000000000 -0400
@@ -116,11 +116,22 @@
 		*(__kcrctab_gpl_future)					\
 		VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .;	\
 	}								\
-									\
+	/* Conditional calls: pointers */				\
+	__cond_call : AT(ADDR(__cond_call) - LOAD_OFFSET) {		\
+		VMLINUX_SYMBOL(__start___cond_call) = .;		\
+		*(__cond_call)						\
+		VMLINUX_SYMBOL(__stop___cond_call) = .;			\
+	}								\
 	/* Kernel symbol table: strings */				\
         __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {	\
 		*(__ksymtab_strings)					\
-	}								\
+ 	}								\
+	/* Conditional calls: strings */				\
+        __cond_call_strings : AT(ADDR(__cond_call_strings) - LOAD_OFFSET) { \
+		*(__cond_call_strings)					\
+ 	}								\
+	__end_rodata = .;						\
+	. = ALIGN(4096);						\
 									\
 	/* Built-in module parameters. */				\
 	__param : AT(ADDR(__param) - LOAD_OFFSET) {			\
@@ -228,4 +239,3 @@
   	*(.initcall6s.init)						\
   	*(.initcall7.init)						\
   	*(.initcall7s.init)
-
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h	2007-05-17 02:12:34.000000000 -0400
+++ linux-2.6-lttng/include/linux/module.h	2007-05-17 02:13:41.000000000 -0400
@@ -16,6 +16,7 @@
 #include <linux/stringify.h>
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
+#include <linux/condcall.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -356,6 +357,9 @@
 	/* The command line arguments (may be mangled).  People like
 	   keeping pointers to this stuff */
 	char *args;
+
+	const struct __cond_call_struct *cond_calls;
+	unsigned int num_cond_calls;
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c	2007-05-17 02:12:25.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c	2007-05-17 02:13:53.000000000 -0400
@@ -32,6 +32,7 @@
 #include <linux/cpu.h>
 #include <linux/moduleparam.h>
 #include <linux/errno.h>
+#include <linux/condcall.h>
 #include <linux/err.h>
 #include <linux/vermagic.h>
 #include <linux/notifier.h>
@@ -141,6 +142,8 @@
 extern const unsigned long __start___kcrctab_gpl_future[];
 extern const unsigned long __start___kcrctab_unused[];
 extern const unsigned long __start___kcrctab_unused_gpl[];
+extern const struct __cond_call_struct __start___cond_call[];
+extern const struct __cond_call_struct __stop___cond_call[];
 
 #ifndef CONFIG_MODVERSIONS
 #define symversion(base, idx) NULL
@@ -301,6 +304,230 @@
 	return NULL;
 }
 
+#ifdef CONFIG_COND_CALL
+
+/* Set the enable bit of the cond_call, choosing the generic or architecture
+ * specific functions depending on the cond_call's flags.
+ */
+static int cond_call_set_enable(void *address, char enable, int flags)
+{
+	if (flags & CF_OPTIMIZED)
+		return cond_call_optimized_set_enable(address, enable);
+	else
+		return cond_call_generic_set_enable(address, enable);
+}
+
+/* Query the state of a cond_calls range. */
+static int _cond_call_query_range(const char *name,
+	const struct __cond_call_struct *begin,
+	const struct __cond_call_struct *end)
+{
+	const struct __cond_call_struct *iter;
+	int ret = 0;
+
+	for (iter = begin; iter < end; iter++) {
+		if (strcmp(name, iter->name) != 0)
+			continue;
+		if (iter->flags & CF_OPTIMIZED)
+			ret = COND_CALL_OPTIMIZED_ENABLE(iter->enable);
+		else
+			ret = COND_CALL_GENERIC_ENABLE(iter->enable);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+/* Sets a range of cond_calls to a enabled state : set the enable bit. */
+static int _cond_call_arm_range(const char *name,
+	const struct __cond_call_struct *begin,
+	const struct __cond_call_struct *end)
+{
+	const struct __cond_call_struct *iter;
+	int found = 0;
+
+	for (iter = begin; iter < end; iter++) {
+		if (strcmp(name, iter->name) != 0)
+			continue;
+		cond_call_set_enable(iter->enable, 1, iter->flags);
+		found++;
+	}
+	return found;
+}
+
+/* Sets a range of cond_calls to a disabled state : unset the enable bit. */
+static int _cond_call_disarm_range(const char *name,
+	const struct __cond_call_struct *begin,
+	const struct __cond_call_struct *end)
+{
+	const struct __cond_call_struct *iter;
+	int found = 0;
+
+	for (iter = begin; iter < end; iter++) {
+		if (strcmp(name, iter->name) != 0)
+			continue;
+		cond_call_set_enable(iter->enable, 0, iter->flags);
+		found++;
+	}
+	return found;
+}
+
+/* Provides a listing of the cond_calls present in the kernel with their type
+ * (optimized or generic) and state (enabled or disabled). */
+static int _cond_call_list_range(const char *name,
+	const struct __cond_call_struct *begin,
+	const struct __cond_call_struct *end)
+{
+	const struct __cond_call_struct *iter;
+	int found = 0;
+
+	for (iter = begin; iter < end; iter++) {
+		if (name)
+			if (strcmp(name, iter->name) != 0)
+				continue;
+		printk("name %s \n", iter->name);
+		if (iter->flags & CF_OPTIMIZED)
+			printk("  enable %u optimized\n",
+				COND_CALL_OPTIMIZED_ENABLE(iter->enable));
+		else
+			printk("  enable %u generic\n",
+				COND_CALL_GENERIC_ENABLE(iter->enable));
+		found++;
+	}
+	return found;
+}
+
+/* Calls _cond_call_query_range for the core cond_calls and modules cond_calls.
+ */
+static int _cond_call_query(const char *name)
+{
+	struct module *mod;
+	int ret = 0;
+
+	/* Core kernel cond_calls */
+	ret = _cond_call_query_range(name,
+			__start___cond_call, __stop___cond_call);
+	if (ret)
+		return ret;
+	/* Cond_calls in modules. */
+	list_for_each_entry(mod, &modules, list) {
+		if (mod->taints)
+			continue;
+		ret = _cond_call_query_range(name,
+			mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+/* Cond_call enabling/disabling use the module_mutex to synchronize.
+ * Returns 1 if enabled, 0 if disabled or not present. */
+int cond_call_query(const char *name)
+{
+	int ret = 0;
+
+	mutex_lock(&module_mutex);
+	ret = _cond_call_query(name);
+	mutex_unlock(&module_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cond_call_query);
+
+/* Calls _cond_call_arm_range for the core cond_calls and modules cond_calls.
+ * FIXME : cond_call will not be active on modules loaded later than the
+ * cond_call_arm. */
+static int _cond_call_arm(const char *name)
+{
+	struct module *mod;
+	int found = 0;
+
+	/* Core kernel cond_calls */
+	found += _cond_call_arm_range(name,
+			__start___cond_call, __stop___cond_call);
+	/* Cond_calls in modules. */
+	list_for_each_entry(mod, &modules, list) {
+		if (mod->taints)
+			continue;
+		found += _cond_call_arm_range(name,
+			mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
+	}
+	return found;
+}
+
+/* Cond_call enabling/disabling use the module_mutex to synchronize. */
+int cond_call_arm(const char *name)
+{
+	int found = 0;
+
+	mutex_lock(&module_mutex);
+	found = _cond_call_arm(name);
+	mutex_unlock(&module_mutex);
+	return found;
+}
+EXPORT_SYMBOL_GPL(cond_call_arm);
+
+/* Calls _cond_call_disarm_range for the core cond_calls and modules cond_calls.
+ */
+static int _cond_call_disarm(const char *name)
+{
+	struct module *mod;
+	int found = 0;
+
+	/* Core kernel cond_calls */
+	found += _cond_call_disarm_range(name,
+			__start___cond_call, __stop___cond_call);
+	/* Cond_calls in modules. */
+	list_for_each_entry(mod, &modules, list) {
+		if (mod->taints)
+			continue;
+		found += _cond_call_disarm_range(name,
+			mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
+	}
+	return found;
+}
+
+/* Cond_call enabling/disabling use the module_mutex to synchronize. */
+int cond_call_disarm(const char *name)
+{
+	int found = 0;
+
+	mutex_lock(&module_mutex);
+	found = _cond_call_disarm(name);
+	mutex_unlock(&module_mutex);
+	return found;
+}
+EXPORT_SYMBOL_GPL(cond_call_disarm);
+
+/* Calls _cond_call_list_range for the core and module cond_calls.
+ * Cond call listing uses the module_mutex to synchronize.
+ * TODO : should output this listing to a procfs file. */
+int cond_call_list(const char *name)
+{
+	struct module *mod;
+	int found = 0;
+
+	mutex_lock(&module_mutex);
+	/* Core kernel cond_calls */
+	printk("Listing kernel cond_calls\n");
+	found += _cond_call_list_range(name,
+			__start___cond_call, __stop___cond_call);
+	/* Cond_calls in modules. */
+	printk("Listing module cond_calls\n");
+	list_for_each_entry(mod, &modules, list) {
+		if (!mod->taints) {
+			printk("Listing cond_calls for module %s\n", mod->name);
+			found += _cond_call_list_range(name, mod->cond_calls,
+				mod->cond_calls+mod->num_cond_calls);
+		}
+	}
+	mutex_unlock(&module_mutex);
+	return found;
+}
+EXPORT_SYMBOL_GPL(cond_call_list);
+
+#endif
+
 #ifdef CONFIG_SMP
 /* Number of blocks used and allocated. */
 static unsigned int pcpu_num_used, pcpu_num_allocated;
@@ -1658,6 +1885,8 @@
 	unsigned int unusedcrcindex;
 	unsigned int unusedgplindex;
 	unsigned int unusedgplcrcindex;
+	unsigned int condcallindex;
+	unsigned int condcallstringsindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1754,6 +1983,8 @@
 #ifdef ARCH_UNWIND_SECTION_NAME
 	unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
 #endif
+	condcallindex = find_sec(hdr, sechdrs, secstrings, "__cond_call");
+	condcallstringsindex = find_sec(hdr, sechdrs, secstrings, "__cond_call_strings");
 
 	/* Don't keep modinfo section */
 	sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1764,6 +1995,18 @@
 #endif
 	if (unwindex)
 		sechdrs[unwindex].sh_flags |= SHF_ALLOC;
+#ifdef CONFIG_COND_CALL
+	if (condcallindex)
+		sechdrs[condcallindex].sh_flags |= SHF_ALLOC;
+	if (condcallstringsindex)
+		sechdrs[condcallstringsindex].sh_flags |= SHF_ALLOC;
+#else
+	if (condcallindex)
+		sechdrs[condcallindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
+	if (condcallstringsindex)
+		sechdrs[condcallstringsindex].sh_flags
+					&= ~(unsigned long)SHF_ALLOC;
+#endif
 
 	/* Check module struct version now, before we try to use module. */
 	if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -1904,6 +2147,11 @@
 	mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
 	if (gplfuturecrcindex)
 		mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
+	if (condcallindex) {
+		mod->cond_calls = (void *)sechdrs[condcallindex].sh_addr;
+		mod->num_cond_calls =
+			sechdrs[condcallindex].sh_size / sizeof(*mod->cond_calls);
+	}
 
 	mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
 	if (unusedcrcindex)

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 57+ messages in thread

end of thread, other threads:[~2007-06-21 13:56 UTC | newest]

Thread overview: 57+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-05-30 14:00 [patch 0/9] Conditional Calls - for 2.6.22-rc2-mm1 Mathieu Desnoyers
2007-05-30 14:00 ` [patch 1/9] Conditional Calls - Architecture Independent Code Mathieu Desnoyers
2007-05-30 20:32   ` Andrew Morton
2007-05-31 16:34     ` Mathieu Desnoyers
2007-05-31 13:47   ` Andi Kleen
2007-06-05 18:40     ` Mathieu Desnoyers
2007-06-04 19:01   ` Adrian Bunk
2007-06-13 15:57     ` Mathieu Desnoyers
2007-06-13 21:51       ` Adrian Bunk
2007-06-14 16:02         ` Mathieu Desnoyers
2007-06-14 21:06           ` Adrian Bunk
2007-06-20 21:59             ` Mathieu Desnoyers
2007-06-21 13:00               ` Adrian Bunk
2007-06-21 13:55                 ` Mathieu Desnoyers
2007-05-30 14:00 ` [patch 2/9] Conditional Calls - Hash Table Mathieu Desnoyers
2007-05-30 20:32   ` Andrew Morton
2007-05-31 13:42   ` Andi Kleen
2007-06-01 16:08     ` Matt Mackall
2007-06-01 16:46       ` Mathieu Desnoyers
2007-06-01 17:07         ` Matt Mackall
2007-06-01 17:45           ` Andi Kleen
2007-06-01 18:06             ` Mathieu Desnoyers
2007-06-01 18:49               ` Matt Mackall
2007-06-01 19:35               ` Andi Kleen
2007-06-01 20:33                 ` Mathieu Desnoyers
2007-06-01 20:44                   ` Andi Kleen
2007-06-04 22:26                     ` Mathieu Desnoyers
2007-06-01 18:03           ` Mathieu Desnoyers
2007-05-30 14:00 ` [patch 3/9] Conditional Calls - Non Optimized Architectures Mathieu Desnoyers
2007-05-30 14:00 ` [patch 4/9] Conditional Calls - Add kconfig menus Mathieu Desnoyers
2007-05-30 14:00 ` [patch 5/9] Conditional Calls - i386 Optimization Mathieu Desnoyers
2007-05-30 20:33   ` Andrew Morton
2007-05-31 13:54   ` Andi Kleen
2007-06-05 19:02     ` Mathieu Desnoyers
2007-05-30 14:00 ` [patch 6/9] Conditional Calls - PowerPC Optimization Mathieu Desnoyers
2007-05-30 14:00 ` [patch 7/9] Conditional Calls - Documentation Mathieu Desnoyers
2007-05-30 14:00 ` [patch 8/9] F00F bug fixup for i386 - use conditional calls Mathieu Desnoyers
2007-05-30 20:33   ` Andrew Morton
2007-05-31 21:07     ` Mathieu Desnoyers
2007-05-31 21:21       ` Andrew Morton
2007-05-31 21:38         ` Mathieu Desnoyers
2007-05-30 14:00 ` [patch 9/9] Scheduler profiling - Use " Mathieu Desnoyers
2007-05-30 20:34   ` Andrew Morton
2007-06-01 15:54     ` Mathieu Desnoyers
2007-06-01 16:19       ` Andrew Morton
2007-06-01 16:33         ` Mathieu Desnoyers
2007-05-30 20:59   ` William Lee Irwin III
2007-05-31 21:12     ` Mathieu Desnoyers
2007-05-31 23:41       ` William Lee Irwin III
2007-06-04 22:20         ` Mathieu Desnoyers
2007-05-30 21:44   ` Matt Mackall
2007-05-31 21:36     ` Mathieu Desnoyers
2007-05-31 13:39   ` Andi Kleen
2007-05-31 22:07     ` Mathieu Desnoyers
2007-05-31 22:33       ` Andi Kleen
2007-06-04 22:20         ` Mathieu Desnoyers
  -- strict thread matches above, loose matches on Subject: below --
2007-05-29 18:33 [patch 0/9] Conditional Calls Mathieu Desnoyers
2007-05-29 18:33 ` [patch 1/9] Conditional Calls - Architecture Independent Code Mathieu Desnoyers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).