linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/5] RFC: fault-injection capabilities
@ 2006-08-23 11:32 Akinobu Mita
  2006-08-23 11:32 ` [patch 1/5] fail-injection library Akinobu Mita
                   ` (7 more replies)
  0 siblings, 8 replies; 20+ messages in thread
From: Akinobu Mita @ 2006-08-23 11:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, okuji

This patch set provides some fault-injection capabilities.

- kmalloc failures

- alloc_pages() failures

- disk IO errors

We can see what really happens if those failures happen.

In order to enable these fault-injection capabilities:

1. Enable relevant config options (CONFIG_FAILSLAB, CONFIG_PAGE_ALLOC,
   CONFIG_MAKE_REQUEST) and runtime configuration kernel module
   (CONFIG_SHOULD_FAIL_KNOBS)

2. build and boot with this kernel

3. modprobe should_fail_knob

4. configure fault-injection capabilities behavior by debugfs

For example about kmalloc failures:

/debug/failslab/probability

	specifies how often it should fail in percent.

/debug/failslab/interval

	specifies the interval of failures.

/debug/failslab/times

	specifies how many times failures may happen at most.

/debug/failslab/space

	specifies the size of free space where memory can be allocated
	safely in bytes.

5. see what really happens.

The idea is taken from failmalloc (http://www.nongnu.org/failmalloc/).
Andrew Morton gave me interesting suggestions.

--

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [patch 1/5] fail-injection library
  2006-08-23 11:32 [patch 0/5] RFC: fault-injection capabilities Akinobu Mita
@ 2006-08-23 11:32 ` Akinobu Mita
  2006-08-23 12:09   ` Andi Kleen
  2006-08-23 11:32 ` [patch 2/5] fail-injection capability for kmalloc Akinobu Mita
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 20+ messages in thread
From: Akinobu Mita @ 2006-08-23 11:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, okuji, Akinobu Mita

[-- Attachment #1: should-fail.patch --]
[-- Type: text/plain, Size: 4186 bytes --]

This patch provides several functions for implement fail-injection
capabilities.

Signed-off-by: Akinobu Mita <mita@miraclelinux.com>

 include/linux/should_fail.h |   44 ++++++++++++++++++++++++++++
 lib/Kconfig.debug           |    4 ++
 lib/Makefile                |    1 
 lib/should_fail.c           |   69 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 118 insertions(+)

Index: work-failmalloc/include/linux/should_fail.h
===================================================================
--- /dev/null
+++ work-failmalloc/include/linux/should_fail.h
@@ -0,0 +1,44 @@
+#ifndef _LINUX_SHOULD_FAIL_H
+#define _LINUX_SHOULD_FAIL_H
+
+#ifdef CONFIG_SHOULD_FAIL
+
+#include <linux/types.h>
+#include <asm/atomic.h>
+
+struct should_fail_data {
+
+	/* how often it should fail in percent. */
+	unsigned long probability;
+
+	/* the interval of failures. */
+	unsigned long interval;
+
+	/*
+	 * how many times failures may happen at most.
+	 * A value of '-1' means infinity.
+	 */
+	atomic_t times;
+
+	/*
+	 * the size of free space where memory can be allocated safely.
+	 * A value of '0' means infinity.
+	 */
+	atomic_t space;
+
+	unsigned long count;
+};
+
+#define DEFINE_SHOULD_FAIL(name) \
+	struct should_fail_data name = { .times = ATOMIC_INIT(-1), }
+
+int should_fail(struct should_fail_data *data, unsigned long size);
+int setup_should_fail(struct should_fail_data *data, char *str);
+
+#else
+
+#define should_fail(data, size)	(0)
+
+#endif /* CONFIG_SHOULD_FAIL */
+
+#endif /* _LINUX_SHOULD_FAIL_H */
Index: work-failmalloc/lib/should_fail.c
===================================================================
--- /dev/null
+++ work-failmalloc/lib/should_fail.c
@@ -0,0 +1,69 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <linux/stat.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/should_fail.h>
+
+int setup_should_fail(struct should_fail_data *data, char *str)
+{
+	unsigned long probability;
+	unsigned long interval;
+	int times;
+	int space;
+
+	/* "<probability>,<interval>,<times>,<space>" */
+	if (sscanf(str, "%lu,%lu,%d,%d", &probability, &interval, &times,
+		   &space) < 4)
+		return 0;
+
+	data->probability = probability;
+	data->interval = interval;
+	atomic_set(&data->times, times);
+	atomic_set(&data->space, space);
+
+	return 1;
+}
+
+#define failure_probability(data)	(data)->probability
+#define failure_interval(data)		(data)->interval
+#define max_failures(data)		(data)->times
+#define current_space(data)		(data)->space
+#define atomic_dec_not_zero(v)		atomic_add_unless((v), -1, 0)
+
+/*
+ * This function is almost taken from failmalloc-1.0
+ * http://www.nongnu.org/failmalloc/
+ */
+
+int should_fail(struct should_fail_data *data, unsigned long size)
+{
+	if (atomic_read(&max_failures(data)) == 0)
+		return 0;
+
+	if (atomic_read(&current_space(data)) > size) {
+		atomic_sub(size, &current_space(data));
+		return 0;
+	}
+
+	if (failure_interval(data) > 1) {
+		data->count++;
+		if (data->count % failure_interval(data))
+			return 0;
+	}
+
+	if (failure_probability(data) == 100 ||
+	    INT_MAX / 100 * failure_probability(data) > get_random_int())
+		goto fail;
+
+	return 0;
+
+fail:
+
+	if (atomic_read(&max_failures(data)) != -1)
+		atomic_dec_not_zero(&max_failures(data));
+
+	return 1;
+}
Index: work-failmalloc/lib/Kconfig.debug
===================================================================
--- work-failmalloc.orig/lib/Kconfig.debug
+++ work-failmalloc/lib/Kconfig.debug
@@ -368,3 +368,7 @@ config RCU_TORTURE_TEST
 	  at boot time (you probably don't).
 	  Say M if you want the RCU torture tests to build as a module.
 	  Say N if you are unsure.
+
+config SHOULD_FAIL
+	bool
+
Index: work-failmalloc/lib/Makefile
===================================================================
--- work-failmalloc.orig/lib/Makefile
+++ work-failmalloc/lib/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
 obj-$(CONFIG_SMP) += percpu_counter.o
 
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
+obj-$(CONFIG_SHOULD_FAIL) += should_fail.o
 
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h

--

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [patch 2/5] fail-injection capability for kmalloc.
  2006-08-23 11:32 [patch 0/5] RFC: fault-injection capabilities Akinobu Mita
  2006-08-23 11:32 ` [patch 1/5] fail-injection library Akinobu Mita
@ 2006-08-23 11:32 ` Akinobu Mita
  2006-08-23 11:32 ` [patch 3/5] fail-injection capability for alloc_pages() Akinobu Mita
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 20+ messages in thread
From: Akinobu Mita @ 2006-08-23 11:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, okuji, Pekka Enberg, Akinobu Mita

[-- Attachment #1: failslab.patch --]
[-- Type: text/plain, Size: 2945 bytes --]

This patch provides fail-injection capability for kmalloc.

Boot option:

	failslab=<probability>,<interval>,<times>,<space>

	<probability>

		specifies how often it should fail in percent.

	<interval>

		specifies the interval of failures.

	<times>

		specifies how many times failures may happen at most.

	<space>

		specifies the size of free space where memory can be allocated
		safely in bytes.

Example:

	failslab=100,10,-1,0

slab allocation (kmalloc, kmem_cache_alloc,..) fails once per 10 times.

Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Akinobu Mita <mita@miraclelinux.com>

 include/linux/should_fail.h |    4 ++++
 lib/Kconfig.debug           |    7 +++++++
 mm/slab.c                   |   20 ++++++++++++++++++++
 3 files changed, 31 insertions(+)

Index: work-failmalloc/mm/slab.c
===================================================================
--- work-failmalloc.orig/mm/slab.c
+++ work-failmalloc/mm/slab.c
@@ -108,6 +108,7 @@
 #include	<linux/mempolicy.h>
 #include	<linux/mutex.h>
 #include	<linux/rtmutex.h>
+#include	<linux/should_fail.h>
 
 #include	<asm/uaccess.h>
 #include	<asm/cacheflush.h>
@@ -2963,11 +2964,30 @@ static void *cache_alloc_debugcheck_afte
 #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
 #endif
 
+#ifdef CONFIG_FAILSLAB
+
+static DEFINE_SHOULD_FAIL(failslab_data);
+
+static int __init setup_failslab(char *str)
+{
+	return setup_should_fail(&failslab_data, str);
+}
+__setup("failslab=", setup_failslab);
+
+struct should_fail_data *failslab = &failslab_data;
+EXPORT_SYMBOL_GPL(failslab);
+
+#endif
+
 static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 {
 	void *objp;
 	struct array_cache *ac;
 
+	if (!(flags & __GFP_NOFAIL) && cachep != &cache_cache &&
+	    should_fail(failslab, obj_size(cachep)))
+		return NULL;
+
 #ifdef CONFIG_NUMA
 	if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
 		objp = alternate_node_alloc(cachep, flags);
Index: work-failmalloc/lib/Kconfig.debug
===================================================================
--- work-failmalloc.orig/lib/Kconfig.debug
+++ work-failmalloc/lib/Kconfig.debug
@@ -372,3 +372,10 @@ config RCU_TORTURE_TEST
 config SHOULD_FAIL
 	bool
 
+config FAILSLAB
+	bool "fault-injection capabilitiy for kmalloc"
+	depends on DEBUG_KERNEL
+	select SHOULD_FAIL
+	help
+	  This option provides fault-injection capabilitiy for kmalloc.
+
Index: work-failmalloc/include/linux/should_fail.h
===================================================================
--- work-failmalloc.orig/include/linux/should_fail.h
+++ work-failmalloc/include/linux/should_fail.h
@@ -35,6 +35,10 @@ struct should_fail_data {
 int should_fail(struct should_fail_data *data, unsigned long size);
 int setup_should_fail(struct should_fail_data *data, char *str);
 
+#ifdef CONFIG_FAILSLAB
+extern struct should_fail_data *failslab;
+#endif
+
 #else
 
 #define should_fail(data, size)	(0)

--

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [patch 3/5] fail-injection capability for alloc_pages()
  2006-08-23 11:32 [patch 0/5] RFC: fault-injection capabilities Akinobu Mita
  2006-08-23 11:32 ` [patch 1/5] fail-injection library Akinobu Mita
  2006-08-23 11:32 ` [patch 2/5] fail-injection capability for kmalloc Akinobu Mita
@ 2006-08-23 11:32 ` Akinobu Mita
  2006-08-23 11:32 ` [patch 4/5] fail-injection capability for disk IO Akinobu Mita
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 20+ messages in thread
From: Akinobu Mita @ 2006-08-23 11:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, okuji, Akinobu Mita

[-- Attachment #1: fail_alloc_pages.patch --]
[-- Type: text/plain, Size: 2881 bytes --]

This patch provides fail-injection capability for alloc_pages()

boot option:

	fail_page_alloc=<probability>,<interval>,<times>,<space>

	<probability>

		specifies how often it should fail in percent.

	<interval>

		specifies the interval of failures.

	<times>

		specifies how many times failures may happen at most.

	<space>

		specifies the size of free space where memory can be allocated
		safely in pages.

Example:

	fail_page_alloc=100,10,-1,0

page allocation fails once per 10 times.

Signed-off-by: Akinobu Mita <mita@miraclelinux.com>

 include/linux/should_fail.h |    4 ++++
 lib/Kconfig.debug           |    7 +++++++
 mm/page_alloc.c             |   20 ++++++++++++++++++++
 3 files changed, 31 insertions(+)

Index: work-failmalloc/lib/Kconfig.debug
===================================================================
--- work-failmalloc.orig/lib/Kconfig.debug
+++ work-failmalloc/lib/Kconfig.debug
@@ -379,3 +379,10 @@ config FAILSLAB
 	help
 	  This option provides fault-injection capabilitiy for kmalloc.
 
+config FAIL_PAGE_ALLOC
+	bool "fault-injection capabilitiy for alloc_pages()"
+	depends on DEBUG_KERNEL
+	select SHOULD_FAIL
+	help
+	  This option provides fault-injection capabilitiy for alloc_pages().
+
Index: work-failmalloc/mm/page_alloc.c
===================================================================
--- work-failmalloc.orig/mm/page_alloc.c
+++ work-failmalloc/mm/page_alloc.c
@@ -37,6 +37,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mempolicy.h>
 #include <linux/stop_machine.h>
+#include <linux/should_fail.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -903,6 +904,21 @@ get_page_from_freelist(gfp_t gfp_mask, u
 	return page;
 }
 
+#ifdef CONFIG_FAIL_PAGE_ALLOC
+
+static DEFINE_SHOULD_FAIL(fail_page_alloc_data);
+
+static int __init setup_fail_page_alloc(char *str)
+{
+	return setup_should_fail(&fail_page_alloc_data, str);
+}
+__setup("fail_page_alloc=", setup_fail_page_alloc);
+
+struct should_fail_data *fail_page_alloc = &fail_page_alloc_data;
+EXPORT_SYMBOL_GPL(fail_page_alloc);
+
+#endif
+
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
@@ -921,6 +937,10 @@ __alloc_pages(gfp_t gfp_mask, unsigned i
 
 	might_sleep_if(wait);
 
+	if (!(gfp_mask & __GFP_NOFAIL) &&
+	    should_fail(fail_page_alloc, 1 << order))
+		return NULL;
+
 restart:
 	z = zonelist->zones;  /* the list of zones suitable for gfp_mask */
 
Index: work-failmalloc/include/linux/should_fail.h
===================================================================
--- work-failmalloc.orig/include/linux/should_fail.h
+++ work-failmalloc/include/linux/should_fail.h
@@ -39,6 +39,10 @@ int setup_should_fail(struct should_fail
 extern struct should_fail_data *failslab;
 #endif
 
+#ifdef CONFIG_FAIL_PAGE_ALLOC
+extern struct should_fail_data *fail_page_alloc;
+#endif
+
 #else
 
 #define should_fail(data, size)	(0)

--

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [patch 4/5] fail-injection capability for disk IO
  2006-08-23 11:32 [patch 0/5] RFC: fault-injection capabilities Akinobu Mita
                   ` (2 preceding siblings ...)
  2006-08-23 11:32 ` [patch 3/5] fail-injection capability for alloc_pages() Akinobu Mita
@ 2006-08-23 11:32 ` Akinobu Mita
  2006-08-23 12:03   ` Jens Axboe
  2006-08-23 12:07   ` Andi Kleen
  2006-08-23 11:32 ` [patch 5/5] debugfs entries for configuration Akinobu Mita
                   ` (3 subsequent siblings)
  7 siblings, 2 replies; 20+ messages in thread
From: Akinobu Mita @ 2006-08-23 11:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, okuji, Jens Axboe, Akinobu Mita

[-- Attachment #1: fail_make_request.patch --]
[-- Type: text/plain, Size: 3049 bytes --]

This patch provides fail-injection capability for disk IO.

Boot option:

	fail_make_request=<probability>,<interval>,<times>,<space>

	<probability>

		specifies how often it should fail in percent.

	<interval>

		specifies the interval of failures.

	<times>

		specifies how many times failures may happen at most.

	<space>

		specifies the size of free space where disk IO can be issued
		safely in bytes.

Example:

	fail_make_request=100,10,-1,0

generic_make_request() fails once per 10 times.

Cc: Jens Axboe <axboe@suse.de>
Signed-off-by: Akinobu Mita <mita@miraclelinux.com>

 block/ll_rw_blk.c           |   19 +++++++++++++++++++
 include/linux/should_fail.h |    4 ++++
 lib/Kconfig.debug           |    7 +++++++
 3 files changed, 30 insertions(+)

Index: work-failmalloc/block/ll_rw_blk.c
===================================================================
--- work-failmalloc.orig/block/ll_rw_blk.c
+++ work-failmalloc/block/ll_rw_blk.c
@@ -28,6 +28,7 @@
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
 #include <linux/blktrace_api.h>
+#include <linux/should_fail.h>
 
 /*
  * for max sense size
@@ -2993,6 +2994,21 @@ static void handle_bad_sector(struct bio
 	set_bit(BIO_EOF, &bio->bi_flags);
 }
 
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+
+static DEFINE_SHOULD_FAIL(fail_make_request_data);
+
+static int __init setup_fail_make_request(char *str)
+{
+	return setup_should_fail(&fail_make_request_data, str);
+}
+__setup("fail_make_request=", setup_fail_make_request);
+
+struct should_fail_data *fail_make_request = &fail_make_request_data;
+EXPORT_SYMBOL_GPL(fail_make_request);
+
+#endif
+
 /**
  * generic_make_request: hand a buffer to its device driver for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -3077,6 +3093,9 @@ end_io:
 		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
 			goto end_io;
 
+		if (should_fail(fail_make_request, bio->bi_size))
+			goto end_io;
+
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
Index: work-failmalloc/lib/Kconfig.debug
===================================================================
--- work-failmalloc.orig/lib/Kconfig.debug
+++ work-failmalloc/lib/Kconfig.debug
@@ -386,3 +386,10 @@ config FAIL_PAGE_ALLOC
 	help
 	  This option provides fault-injection capabilitiy for alloc_pages().
 
+config FAIL_MAKE_REQUEST
+	bool "fault-injection capabilitiy for disk IO"
+	depends on DEBUG_KERNEL
+	select SHOULD_FAIL
+	help
+	  This option provides fault-injection capabilitiy to disk IO.
+
Index: work-failmalloc/include/linux/should_fail.h
===================================================================
--- work-failmalloc.orig/include/linux/should_fail.h
+++ work-failmalloc/include/linux/should_fail.h
@@ -43,6 +43,10 @@ extern struct should_fail_data *failslab
 extern struct should_fail_data *fail_page_alloc;
 #endif
 
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+extern struct should_fail_data *fail_make_request;
+#endif
+
 #else
 
 #define should_fail(data, size)	(0)

--

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [patch 5/5] debugfs entries for configuration
  2006-08-23 11:32 [patch 0/5] RFC: fault-injection capabilities Akinobu Mita
                   ` (3 preceding siblings ...)
  2006-08-23 11:32 ` [patch 4/5] fail-injection capability for disk IO Akinobu Mita
@ 2006-08-23 11:32 ` Akinobu Mita
  2006-08-23 12:06 ` [patch 0/5] RFC: fault-injection capabilities Andi Kleen
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 20+ messages in thread
From: Akinobu Mita @ 2006-08-23 11:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, okuji, Akinobu Mita

[-- Attachment #1: knobs.patch --]
[-- Type: text/plain, Size: 5778 bytes --]

This kernel module provides debugfs entries for fault-injection
capabilities configuation.

/debug/
|-- fail_make_request
|   |-- interval
|   |-- probability
|   |-- space
|   `-- times
|-- fail_page_alloc
|   |-- interval
|   |-- probability
|   |-- space
|   `-- times
`-- failslab
    |-- interval
    |-- probability
    |-- space
    `-- times

Signed-off-by: Akinobu Mita <mita@miraclelinux.com>

 lib/Kconfig.debug       |    8 ++
 lib/Makefile            |    1 
 lib/should_fail_knobs.c |  168 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 177 insertions(+)

Index: work-failmalloc/lib/should_fail_knobs.c
===================================================================
--- /dev/null
+++ work-failmalloc/lib/should_fail_knobs.c
@@ -0,0 +1,168 @@
+#include <linux/module.h>
+#include <linux/should_fail.h>
+#include <linux/debugfs.h>
+
+struct should_fail_knobs {
+	struct dentry *dir;
+	struct dentry *probability_file;
+	struct dentry *interval_file;
+	struct dentry *times_file;
+	struct dentry *space_file;
+};
+
+static void debugfs_ul_set(void *data, u64 val)
+{
+	*(unsigned long *)data = val;
+}
+
+static u64 debugfs_ul_get(void *data)
+{
+	return *(unsigned long *)data;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_ul, debugfs_ul_get, debugfs_ul_set, "%llu\n");
+
+static struct dentry *debugfs_create_ul(const char *name, mode_t mode,
+				struct dentry *parent, unsigned long *value)
+{
+	return debugfs_create_file(name, mode, parent, value, &fops_ul);
+}
+
+static void debugfs_atomic_t_set(void *data, u64 val)
+{
+	atomic_set((atomic_t *)data, val);
+}
+
+static u64 debugfs_atomic_t_get(void *data)
+{
+	return atomic_read((atomic_t *)data);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
+			debugfs_atomic_t_set, "%lld\n");
+
+static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode,
+				struct dentry *parent, atomic_t *value)
+{
+	return debugfs_create_file(name, mode, parent, value, &fops_atomic_t);
+}
+
+static void cleanup_should_fail_knobs(struct should_fail_knobs *knobs)
+{
+	if (knobs->dir) {
+		if (knobs->probability_file) {
+			debugfs_remove(knobs->probability_file);
+			knobs->probability_file = NULL;
+		}
+		if (knobs->interval_file) {
+			debugfs_remove(knobs->interval_file);
+			knobs->interval_file = NULL;
+		}
+		if (knobs->times_file) {
+			debugfs_remove(knobs->times_file);
+			knobs->times_file = NULL;
+		}
+		if (knobs->space_file) {
+			debugfs_remove(knobs->space_file);
+			knobs->space_file = NULL;
+		}
+		debugfs_remove(knobs->dir);
+		knobs->dir = NULL;
+	}
+}
+
+static int init_should_fail_knobs(struct should_fail_knobs *knobs,
+			   struct should_fail_data *data, const char *name)
+{
+	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+	struct dentry *dir;
+	struct dentry *file;
+
+	memset(knobs, 0, sizeof(*knobs));
+
+	dir = debugfs_create_dir(name, NULL);
+	if (!dir)
+		goto fail;
+	knobs->dir = dir;
+
+	file = debugfs_create_ul("probability", mode, dir, &data->probability);
+	if (!file)
+		goto fail;
+	knobs->probability_file = file;
+
+	file = debugfs_create_ul("interval", mode, dir, &data->interval);
+	if (!file)
+		goto fail;
+	knobs->interval_file = file;
+
+	file = debugfs_create_atomic_t("times", mode, dir, &data->times);
+	if (!file)
+		goto fail;
+	knobs->times_file = file;
+
+	file = debugfs_create_atomic_t("space", mode, dir, &data->space);
+	if (!file)
+		goto fail;
+	knobs->space_file = file;
+
+	return 0;
+fail:
+	cleanup_should_fail_knobs(knobs);
+	return -ENOMEM;
+}
+
+#ifdef CONFIG_FAILSLAB
+static struct should_fail_knobs failslab_knobs;
+#endif
+#ifdef CONFIG_FAIL_PAGE_ALLOC
+static struct should_fail_knobs fail_page_alloc_knobs;
+#endif
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+static struct should_fail_knobs fail_make_request_knobs;
+#endif
+
+static void cleanup_knobs(void)
+{
+#ifdef CONFIG_FAILSLAB
+	cleanup_should_fail_knobs(&fail_make_request_knobs);
+#endif
+#ifdef CONFIG_FAIL_PAGE_ALLOC
+	cleanup_should_fail_knobs(&fail_page_alloc_knobs);
+#endif
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+	cleanup_should_fail_knobs(&failslab_knobs);
+#endif
+}
+
+static int init_knobs(void)
+{
+	int err;
+
+#ifdef CONFIG_FAILSLAB
+	err = init_should_fail_knobs(&failslab_knobs, failslab, "failslab");
+	if (err)
+		goto fail;
+#endif
+#ifdef CONFIG_FAIL_PAGE_ALLOC
+	err = init_should_fail_knobs(&fail_page_alloc_knobs, fail_page_alloc,
+				     "fail_page_alloc");
+	if (err)
+		goto fail;
+#endif
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+	err = init_should_fail_knobs(&fail_make_request_knobs,
+				     fail_make_request, "fail_make_request");
+	if (err)
+		goto fail;
+#endif
+
+	return 0;
+fail:
+	cleanup_knobs();
+
+	return err;
+}
+
+module_init(init_knobs);
+module_exit(cleanup_knobs);
+MODULE_LICENSE("GPL");
Index: work-failmalloc/lib/Kconfig.debug
===================================================================
--- work-failmalloc.orig/lib/Kconfig.debug
+++ work-failmalloc/lib/Kconfig.debug
@@ -393,3 +393,11 @@ config FAIL_MAKE_REQUEST
 	help
 	  This option provides fault-injection capabilitiy to disk IO.
 
+config SHOULD_FAIL_KNOBS
+	tristate "runtime configuration for fault-injection capabilities"
+	depends on DEBUG_KERNEL && SYSFS && SHOULD_FAIL
+	select DEBUG_FS
+	help
+	  This option provides kernel module that provides runtime
+	  configuration interface by debugfs.
+
Index: work-failmalloc/lib/Makefile
===================================================================
--- work-failmalloc.orig/lib/Makefile
+++ work-failmalloc/lib/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_SMP) += percpu_counter.o
 
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
 obj-$(CONFIG_SHOULD_FAIL) += should_fail.o
+obj-$(CONFIG_SHOULD_FAIL_KNOBS) += should_fail_knobs.o
 
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h

--

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 4/5] fail-injection capability for disk IO
  2006-08-23 11:32 ` [patch 4/5] fail-injection capability for disk IO Akinobu Mita
@ 2006-08-23 12:03   ` Jens Axboe
  2006-08-23 17:27     ` Andrew Morton
  2006-08-23 12:07   ` Andi Kleen
  1 sibling, 1 reply; 20+ messages in thread
From: Jens Axboe @ 2006-08-23 12:03 UTC (permalink / raw)
  To: Akinobu Mita; +Cc: linux-kernel, akpm, okuji

On Wed, Aug 23 2006, Akinobu Mita wrote:
> This patch provides fail-injection capability for disk IO.
> 
> Boot option:
> 
> 	fail_make_request=<probability>,<interval>,<times>,<space>
> 
> 	<probability>
> 
> 		specifies how often it should fail in percent.
> 
> 	<interval>
> 
> 		specifies the interval of failures.
> 
> 	<times>
> 
> 		specifies how many times failures may happen at most.
> 
> 	<space>
> 
> 		specifies the size of free space where disk IO can be issued
> 		safely in bytes.
> 
> Example:
> 
> 	fail_make_request=100,10,-1,0
> 
> generic_make_request() fails once per 10 times.

Hmm dunno, seems a pretty useless feature to me. Wouldn't it make a lot
more sense to do this per-queue instead of a global entity?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 0/5] RFC: fault-injection capabilities
  2006-08-23 11:32 [patch 0/5] RFC: fault-injection capabilities Akinobu Mita
                   ` (4 preceding siblings ...)
  2006-08-23 11:32 ` [patch 5/5] debugfs entries for configuration Akinobu Mita
@ 2006-08-23 12:06 ` Andi Kleen
  2006-08-23 14:18 ` Alexey Dobriyan
  2006-08-24 18:41 ` Valdis.Kletnieks
  7 siblings, 0 replies; 20+ messages in thread
From: Andi Kleen @ 2006-08-23 12:06 UTC (permalink / raw)
  To: Akinobu Mita; +Cc: akpm, okuji, linux-kernel

Akinobu Mita <mita@miraclelinux.com> writes:

> This patch set provides some fault-injection capabilities.
> 
> - kmalloc failures
> 
> - alloc_pages() failures
> 
> - disk IO errors
> 
> We can see what really happens if those failures happen.

Nice.

The SUSE kernel has a crasher module that is also quite useful for testing.
What it does basically is to always allocate/free memory and overwrite
the memory and check if the memory hasn't been changed by someone else.
Perhaps something like that could be incorporated into your framework too?

I put a copy of the suse patch in 
http://www.firstfloor.org/~andi/crasher-26.diff


> 
> In order to enable these fault-injection capabilities:

However I'm not sure they're too useful right now.  The problem is
that they're too global and might render the system unusable. Have you
considered adding some more filters, like uid/gid to fail only (
I think that would be useful because then it would be possible
to run test suites with faults while keeping other parts of the system
functional) or maybe even a list of callers to test? e.g. only
failing for module foo would be nice.

-Andi


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 4/5] fail-injection capability for disk IO
  2006-08-23 11:32 ` [patch 4/5] fail-injection capability for disk IO Akinobu Mita
  2006-08-23 12:03   ` Jens Axboe
@ 2006-08-23 12:07   ` Andi Kleen
  2006-08-23 12:10     ` Jens Axboe
  1 sibling, 1 reply; 20+ messages in thread
From: Andi Kleen @ 2006-08-23 12:07 UTC (permalink / raw)
  To: Akinobu Mita; +Cc: akpm, okuji, Jens Axboe, linux-kernel

Akinobu Mita <mita@miraclelinux.com> writes:
>   * @bio:  The bio describing the location in memory and on the device.
> @@ -3077,6 +3093,9 @@ end_io:
>  		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
>  			goto end_io;
>  
> +		if (should_fail(fail_make_request, bio->bi_size))
> +			goto end_io;

AFAIK it is reasonably easy to write stacking block drivers.
I think I would prefer a stackable driver instead of this hook.

-Andi

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 1/5] fail-injection library
  2006-08-23 11:32 ` [patch 1/5] fail-injection library Akinobu Mita
@ 2006-08-23 12:09   ` Andi Kleen
  0 siblings, 0 replies; 20+ messages in thread
From: Andi Kleen @ 2006-08-23 12:09 UTC (permalink / raw)
  To: Akinobu Mita; +Cc: akpm, okuji, linux-kernel

Akinobu Mita <mita@miraclelinux.com> writes:
> +	if (failure_probability(data) == 100 ||
> +	    INT_MAX / 100 * failure_probability(data) > get_random_int())

I don't think it's a good idea to use get_random_int here. It's a secure
quite heavyweight random simulator that eats up precious entropy.

I would use something simple with an option for the user to specify the seed
(default jiffies maybe) for reproducibility.

e.g. the perfmon patchkit that was just posted had a reasonable simple
pseudo RND for lib/. Maybe you can reuse that.

-Andi

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 4/5] fail-injection capability for disk IO
  2006-08-23 12:07   ` Andi Kleen
@ 2006-08-23 12:10     ` Jens Axboe
  2006-08-23 19:34       ` Mario 'BitKoenig' Holbe
  0 siblings, 1 reply; 20+ messages in thread
From: Jens Axboe @ 2006-08-23 12:10 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Akinobu Mita, akpm, okuji, linux-kernel

On Wed, Aug 23 2006, Andi Kleen wrote:
> Akinobu Mita <mita@miraclelinux.com> writes:
> >   * @bio:  The bio describing the location in memory and on the device.
> > @@ -3077,6 +3093,9 @@ end_io:
> >  		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
> >  			goto end_io;
> >  
> > +		if (should_fail(fail_make_request, bio->bi_size))
> > +			goto end_io;
> 
> AFAIK it is reasonably easy to write stacking block drivers.
> I think I would prefer a stackable driver instead of this hook.

But that makes it more tricky to setup a test, since you have to change
from using /dev/sda (for example) to /dev/stacked-driver.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 0/5] RFC: fault-injection capabilities
  2006-08-23 11:32 [patch 0/5] RFC: fault-injection capabilities Akinobu Mita
                   ` (5 preceding siblings ...)
  2006-08-23 12:06 ` [patch 0/5] RFC: fault-injection capabilities Andi Kleen
@ 2006-08-23 14:18 ` Alexey Dobriyan
  2006-08-24 18:41 ` Valdis.Kletnieks
  7 siblings, 0 replies; 20+ messages in thread
From: Alexey Dobriyan @ 2006-08-23 14:18 UTC (permalink / raw)
  To: Akinobu Mita; +Cc: linux-kernel, akpm, okuji

On Wed, Aug 23, 2006 at 08:32:43PM +0900, Akinobu Mita wrote:
> This patch set provides some fault-injection capabilities.
>
> - kmalloc failures
>
> - alloc_pages() failures
>
> - disk IO errors
>
> We can see what really happens if those failures happen.

What bugs fault-injection has already found? Ingo and Sons fixed quite
a few, _before_ lockdep was merged.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 4/5] fail-injection capability for disk IO
  2006-08-23 12:03   ` Jens Axboe
@ 2006-08-23 17:27     ` Andrew Morton
  2006-08-23 18:01       ` Jens Axboe
  2006-08-23 18:22       ` Hans Reiser
  0 siblings, 2 replies; 20+ messages in thread
From: Andrew Morton @ 2006-08-23 17:27 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Akinobu Mita, linux-kernel, okuji

On Wed, 23 Aug 2006 14:03:55 +0200
Jens Axboe <axboe@suse.de> wrote:

> On Wed, Aug 23 2006, Akinobu Mita wrote:
> > This patch provides fail-injection capability for disk IO.
> > 
> > Boot option:
> > 
> > 	fail_make_request=<probability>,<interval>,<times>,<space>
> > 
> > 	<probability>
> > 
> > 		specifies how often it should fail in percent.
> > 
> > 	<interval>
> > 
> > 		specifies the interval of failures.
> > 
> > 	<times>
> > 
> > 		specifies how many times failures may happen at most.
> > 
> > 	<space>
> > 
> > 		specifies the size of free space where disk IO can be issued
> > 		safely in bytes.
> > 
> > Example:
> > 
> > 	fail_make_request=100,10,-1,0
> > 
> > generic_make_request() fails once per 10 times.
> 
> Hmm dunno, seems a pretty useless feature to me.

We need it.  What is the FS/VFS/VM behaviour in the presence of IO
errors?  Nobody knows, because we rarely test it.  Those few times where
people _do_ test it (the hard way), bad things tend to happen.  reiserfs
(for example) likes to go wobble, wobble, wobble, BUG.

> Wouldn't it make a lot
> more sense to do this per-queue instead of a global entity?

Yes, I think so.  /sys/block/sda/sda2/make-it-fail.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 4/5] fail-injection capability for disk IO
  2006-08-23 17:27     ` Andrew Morton
@ 2006-08-23 18:01       ` Jens Axboe
  2006-08-23 18:16         ` Ric Wheeler
  2006-08-23 18:22       ` Hans Reiser
  1 sibling, 1 reply; 20+ messages in thread
From: Jens Axboe @ 2006-08-23 18:01 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Akinobu Mita, linux-kernel, okuji

On Wed, Aug 23 2006, Andrew Morton wrote:
> On Wed, 23 Aug 2006 14:03:55 +0200
> Jens Axboe <axboe@suse.de> wrote:
> 
> > On Wed, Aug 23 2006, Akinobu Mita wrote:
> > > This patch provides fail-injection capability for disk IO.
> > > 
> > > Boot option:
> > > 
> > > 	fail_make_request=<probability>,<interval>,<times>,<space>
> > > 
> > > 	<probability>
> > > 
> > > 		specifies how often it should fail in percent.
> > > 
> > > 	<interval>
> > > 
> > > 		specifies the interval of failures.
> > > 
> > > 	<times>
> > > 
> > > 		specifies how many times failures may happen at most.
> > > 
> > > 	<space>
> > > 
> > > 		specifies the size of free space where disk IO can be issued
> > > 		safely in bytes.
> > > 
> > > Example:
> > > 
> > > 	fail_make_request=100,10,-1,0
> > > 
> > > generic_make_request() fails once per 10 times.
> > 
> > Hmm dunno, seems a pretty useless feature to me.
> 
> We need it.  What is the FS/VFS/VM behaviour in the presence of IO
> errors?  Nobody knows, because we rarely test it.  Those few times where
> people _do_ test it (the hard way), bad things tend to happen.  reiserfs
> (for example) likes to go wobble, wobble, wobble, BUG.

You misunderstood me - a global parameter is useless, as it makes it
pretty impossible for people to use this for any sort of testing (unless
it's very specialized). I didn't say a feature to test io errors was
useless!

> > Wouldn't it make a lot
> > more sense to do this per-queue instead of a global entity?
> 
> Yes, I think so.  /sys/block/sda/sda2/make-it-fail.

Precisely.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 4/5] fail-injection capability for disk IO
  2006-08-23 18:01       ` Jens Axboe
@ 2006-08-23 18:16         ` Ric Wheeler
  2006-08-23 18:26           ` Jens Axboe
  0 siblings, 1 reply; 20+ messages in thread
From: Ric Wheeler @ 2006-08-23 18:16 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Andrew Morton, Akinobu Mita, linux-kernel, okuji

Jens Axboe wrote:
> On Wed, Aug 23 2006, Andrew Morton wrote:
> 
>>On Wed, 23 Aug 2006 14:03:55 +0200
>>Jens Axboe <axboe@suse.de> wrote:
>>
>>
>>>On Wed, Aug 23 2006, Akinobu Mita wrote:
>>>
>>>>This patch provides fail-injection capability for disk IO.
>>>>
>>>>Boot option:
>>>>
>>>>	fail_make_request=<probability>,<interval>,<times>,<space>
>>>>
>>>>	<probability>
>>>>
>>>>		specifies how often it should fail in percent.
>>>>
>>>>	<interval>
>>>>
>>>>		specifies the interval of failures.
>>>>
>>>>	<times>
>>>>
>>>>		specifies how many times failures may happen at most.
>>>>
>>>>	<space>
>>>>
>>>>		specifies the size of free space where disk IO can be issued
>>>>		safely in bytes.
>>>>
>>>>Example:
>>>>
>>>>	fail_make_request=100,10,-1,0
>>>>
>>>>generic_make_request() fails once per 10 times.
>>>
>>>Hmm dunno, seems a pretty useless feature to me.
>>
>>We need it.  What is the FS/VFS/VM behaviour in the presence of IO
>>errors?  Nobody knows, because we rarely test it.  Those few times where
>>people _do_ test it (the hard way), bad things tend to happen.  reiserfs
>>(for example) likes to go wobble, wobble, wobble, BUG.
> 
> 
> You misunderstood me - a global parameter is useless, as it makes it
> pretty impossible for people to use this for any sort of testing (unless
> it's very specialized). I didn't say a feature to test io errors was
> useless!
> 
> 
>>>Wouldn't it make a lot
>>>more sense to do this per-queue instead of a global entity?
>>
>>Yes, I think so.  /sys/block/sda/sda2/make-it-fail.
> 
> 
> Precisely.
> 

I think that this is very useful for testing file systems.

What this will miss is the error path through the lower levels of the IO 
path (i.e., the libata/SCSI error handling confusion that Mark Lord has 
been working on patches for would need some error injection at or below 
the libata level).

We currently test this whole path with either weird fault injection gear 
to hit the s-ata bus or the old fashion pile of moderately flaky disks 
that we try hard not to fix or totally kill.

It would be really useful to get something (target mode SW disk? libata 
or other low level error injection?) to test this whole path in software...

ric



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 4/5] fail-injection capability for disk IO
  2006-08-23 17:27     ` Andrew Morton
  2006-08-23 18:01       ` Jens Axboe
@ 2006-08-23 18:22       ` Hans Reiser
  1 sibling, 0 replies; 20+ messages in thread
From: Hans Reiser @ 2006-08-23 18:22 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Jens Axboe, Akinobu Mita, linux-kernel, okuji

Andrew Morton wrote:
> On Wed, 23 Aug 2006 14:03:55 +0200
> Jens Axboe <axboe@suse.de> wrote:
>
>   
>> On Wed, Aug 23 2006, Akinobu Mita wrote:
>>     
>>> This patch provides fail-injection capability for disk IO.
>>>
>>> Boot option:
>>>
>>> 	fail_make_request=<probability>,<interval>,<times>,<space>
>>>
>>> 	<probability>
>>>
>>> 		specifies how often it should fail in percent.
>>>
>>> 	<interval>
>>>
>>> 		specifies the interval of failures.
>>>
>>> 	<times>
>>>
>>> 		specifies how many times failures may happen at most.
>>>
>>> 	<space>
>>>
>>> 		specifies the size of free space where disk IO can be issued
>>> 		safely in bytes.
>>>
>>> Example:
>>>
>>> 	fail_make_request=100,10,-1,0
>>>
>>> generic_make_request() fails once per 10 times.
>>>       
>> Hmm dunno, seems a pretty useless feature to me.
>>     
>
> We need it.  What is the FS/VFS/VM behaviour in the presence of IO
> errors?  Nobody knows, because we rarely test it.  Those few times where
> people _do_ test it (the hard way), bad things tend to happen.  reiserfs
> (for example) likes to go wobble, wobble, wobble, BUG.
>   
The iron folks tested it, and we did better than other FS's.  That said,
it seems like a valuable feature to me.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 4/5] fail-injection capability for disk IO
  2006-08-23 18:16         ` Ric Wheeler
@ 2006-08-23 18:26           ` Jens Axboe
  0 siblings, 0 replies; 20+ messages in thread
From: Jens Axboe @ 2006-08-23 18:26 UTC (permalink / raw)
  To: Ric Wheeler; +Cc: Andrew Morton, Akinobu Mita, linux-kernel, okuji

On Wed, Aug 23 2006, Ric Wheeler wrote:
> Jens Axboe wrote:
> >On Wed, Aug 23 2006, Andrew Morton wrote:
> >
> >>On Wed, 23 Aug 2006 14:03:55 +0200
> >>Jens Axboe <axboe@suse.de> wrote:
> >>
> >>
> >>>On Wed, Aug 23 2006, Akinobu Mita wrote:
> >>>
> >>>>This patch provides fail-injection capability for disk IO.
> >>>>
> >>>>Boot option:
> >>>>
> >>>>	fail_make_request=<probability>,<interval>,<times>,<space>
> >>>>
> >>>>	<probability>
> >>>>
> >>>>		specifies how often it should fail in percent.
> >>>>
> >>>>	<interval>
> >>>>
> >>>>		specifies the interval of failures.
> >>>>
> >>>>	<times>
> >>>>
> >>>>		specifies how many times failures may happen at most.
> >>>>
> >>>>	<space>
> >>>>
> >>>>		specifies the size of free space where disk IO can be issued
> >>>>		safely in bytes.
> >>>>
> >>>>Example:
> >>>>
> >>>>	fail_make_request=100,10,-1,0
> >>>>
> >>>>generic_make_request() fails once per 10 times.
> >>>
> >>>Hmm dunno, seems a pretty useless feature to me.
> >>
> >>We need it.  What is the FS/VFS/VM behaviour in the presence of IO
> >>errors?  Nobody knows, because we rarely test it.  Those few times where
> >>people _do_ test it (the hard way), bad things tend to happen.  reiserfs
> >>(for example) likes to go wobble, wobble, wobble, BUG.
> >
> >
> >You misunderstood me - a global parameter is useless, as it makes it
> >pretty impossible for people to use this for any sort of testing (unless
> >it's very specialized). I didn't say a feature to test io errors was
> >useless!
> >
> >
> >>>Wouldn't it make a lot
> >>>more sense to do this per-queue instead of a global entity?
> >>
> >>Yes, I think so.  /sys/block/sda/sda2/make-it-fail.
> >
> >
> >Precisely.
> >
> 
> I think that this is very useful for testing file systems.
> 
> What this will miss is the error path through the lower levels of the IO 
> path (i.e., the libata/SCSI error handling confusion that Mark Lord has 
> been working on patches for would need some error injection at or below 
> the libata level).
> 
> We currently test this whole path with either weird fault injection gear 
> to hit the s-ata bus or the old fashion pile of moderately flaky disks 
> that we try hard not to fix or totally kill.
> 
> It would be really useful to get something (target mode SW disk? libata 
> or other low level error injection?) to test this whole path in software...

Yes, this approach only tests the layer(s) above the device. To simulate
hardware failure or timeouts, I _think_ scsi_debug can already help you
quite a bit. If not, it should be easy enough to extend do add these
sorts of things.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 4/5] fail-injection capability for disk IO
  2006-08-23 12:10     ` Jens Axboe
@ 2006-08-23 19:34       ` Mario 'BitKoenig' Holbe
  2006-08-23 19:42         ` Ric Wheeler
  0 siblings, 1 reply; 20+ messages in thread
From: Mario 'BitKoenig' Holbe @ 2006-08-23 19:34 UTC (permalink / raw)
  To: linux-kernel

Jens Axboe <axboe@suse.de> wrote:
> On Wed, Aug 23 2006, Andi Kleen wrote:
>> I think I would prefer a stackable driver instead of this hook.

I second this, preferrably a device-mapper target similar to dm-error.

> But that makes it more tricky to setup a test, since you have to change
> from using /dev/sda (for example) to /dev/stacked-driver.

Do you really think somebody would run such tests on otherwise normally
used devices?


regards
   Mario
-- 
There are two major products that come from Berkeley: LSD and UNIX.
We don't believe this to be a coincidence.    -- Jeremy S. Anderson


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 4/5] fail-injection capability for disk IO
  2006-08-23 19:34       ` Mario 'BitKoenig' Holbe
@ 2006-08-23 19:42         ` Ric Wheeler
  0 siblings, 0 replies; 20+ messages in thread
From: Ric Wheeler @ 2006-08-23 19:42 UTC (permalink / raw)
  To: Mario 'BitKoenig' Holbe; +Cc: linux-kernel

Mario 'BitKoenig' Holbe wrote:
> Jens Axboe <axboe@suse.de> wrote:
> 
>>On Wed, Aug 23 2006, Andi Kleen wrote:
>>
>>>I think I would prefer a stackable driver instead of this hook.
> 
> 
> I second this, preferrably a device-mapper target similar to dm-error.
> 
> 
>>But that makes it more tricky to setup a test, since you have to change
>>from using /dev/sda (for example) to /dev/stacked-driver.
> 
> 
> Do you really think somebody would run such tests on otherwise normally
> used devices?
> 

We certainly run this kind of tests on a routine basis - before we ship 
a kernel to our installed field, we need to verify that it will handle 
disk IO errors correctly.

In our case, the tests are run on a farm of machines that get pxe'ed to 
a specific image, tested (usually by sticking in a disk known to be bad 
enough to cause reliable errors ;-)) and then we watch to see that the 
errors do not cause hangs, etc.

Having a requirement to change our standard image (sda -> 
stacked-driver) would not be impossible, but would be less convenient...

ric

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [patch 0/5] RFC: fault-injection capabilities
  2006-08-23 11:32 [patch 0/5] RFC: fault-injection capabilities Akinobu Mita
                   ` (6 preceding siblings ...)
  2006-08-23 14:18 ` Alexey Dobriyan
@ 2006-08-24 18:41 ` Valdis.Kletnieks
  7 siblings, 0 replies; 20+ messages in thread
From: Valdis.Kletnieks @ 2006-08-24 18:41 UTC (permalink / raw)
  To: Akinobu Mita; +Cc: linux-kernel, akpm, okuji

[-- Attachment #1: Type: text/plain, Size: 620 bytes --]

On Wed, 23 Aug 2006 20:32:43 +0900, Akinobu Mita said:

> For example about kmalloc failures:
> 
> /debug/failslab/probability
> 
> 	specifies how often it should fail in percent.

As others have noted, the *right* semantics for this is being able to inject a
1% or higher rate in the code you're interested in, while maintaining a 0
injection rate for things outside the module under test.  Maybe a /debug/
failslab/address_start and address_end, and a userspace helper that peeks at a
System.map and injects the right values - then it's a simple compare of the
high/low addresses provided against the caller address.


[-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2006-08-24 18:42 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-08-23 11:32 [patch 0/5] RFC: fault-injection capabilities Akinobu Mita
2006-08-23 11:32 ` [patch 1/5] fail-injection library Akinobu Mita
2006-08-23 12:09   ` Andi Kleen
2006-08-23 11:32 ` [patch 2/5] fail-injection capability for kmalloc Akinobu Mita
2006-08-23 11:32 ` [patch 3/5] fail-injection capability for alloc_pages() Akinobu Mita
2006-08-23 11:32 ` [patch 4/5] fail-injection capability for disk IO Akinobu Mita
2006-08-23 12:03   ` Jens Axboe
2006-08-23 17:27     ` Andrew Morton
2006-08-23 18:01       ` Jens Axboe
2006-08-23 18:16         ` Ric Wheeler
2006-08-23 18:26           ` Jens Axboe
2006-08-23 18:22       ` Hans Reiser
2006-08-23 12:07   ` Andi Kleen
2006-08-23 12:10     ` Jens Axboe
2006-08-23 19:34       ` Mario 'BitKoenig' Holbe
2006-08-23 19:42         ` Ric Wheeler
2006-08-23 11:32 ` [patch 5/5] debugfs entries for configuration Akinobu Mita
2006-08-23 12:06 ` [patch 0/5] RFC: fault-injection capabilities Andi Kleen
2006-08-23 14:18 ` Alexey Dobriyan
2006-08-24 18:41 ` Valdis.Kletnieks

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).