All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] dm-bufio
@ 2011-10-14 19:14 Mikulas Patocka
  2011-10-17 10:08 ` Joe Thornber
                   ` (8 more replies)
  0 siblings, 9 replies; 51+ messages in thread
From: Mikulas Patocka @ 2011-10-14 19:14 UTC (permalink / raw)
  To: Alasdair G. Kergon; +Cc: dm-devel, Edward Thornber, Mike Snitzer

Hi

This is a patch for dm-bufio.

Changes:
* fixed a bug in i/o submission, introduced by someone who changed the 
code
* simplified some constructs
* more likely/unlikely hints
* dm-bufio.h moved from drivers/md to include/linux
* put cond_resched() to loops (it was there originally and then someone 
deleted it)

Mikulas

---
 drivers/md/Kconfig       |    2 
 drivers/md/dm-bufio.c    |   95 ++++++++++++++++++++++++----------------
 drivers/md/dm-bufio.h    |  110 -----------------------------------------------
 include/linux/dm-bufio.h |  110 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 169 insertions(+), 148 deletions(-)

Index: linux-3.1-rc3-fast/drivers/md/Kconfig
===================================================================
--- linux-3.1-rc3-fast.orig/drivers/md/Kconfig	2011-10-14 20:56:45.000000000 +0200
+++ linux-3.1-rc3-fast/drivers/md/Kconfig	2011-10-14 20:56:46.000000000 +0200
@@ -209,7 +209,7 @@ config DM_DEBUG
 	  If unsure, say N.
 
 config DM_BUFIO
-       tristate
+       tristate "Buffered IO"
        depends on BLK_DEV_DM && EXPERIMENTAL
        ---help---
 	 This interface allows you to do buffered I/O on a device and acts
Index: linux-3.1-rc3-fast/drivers/md/dm-bufio.c
===================================================================
--- linux-3.1-rc3-fast.orig/drivers/md/dm-bufio.c	2011-10-14 20:56:45.000000000 +0200
+++ linux-3.1-rc3-fast/drivers/md/dm-bufio.c	2011-10-14 20:57:10.000000000 +0200
@@ -167,6 +167,12 @@ static void dm_bufio_unlock(struct dm_bu
 	mutex_unlock(&c->lock);
 }
 
+#ifdef CONFIG_PREEMPT
+#define dm_bufio_cond_resched()		do { } while (0)
+#else
+#define dm_bufio_cond_resched()		cond_resched()
+#endif
+
 /*----------------------------------------------------------------*/
 
 /* Default cache size --- available memory divided by the ratio */
@@ -470,17 +476,18 @@ static void use_inline_bio(struct dm_buf
 	ptr = b->data;
 	len = b->c->block_size;
 
-	if (len >= PAGE_SIZE)
+	if (likely(len >= PAGE_SIZE))
 		BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
 	else
 		BUG_ON((unsigned long)ptr & (len - 1));
 
 	do {
-		if (!bio_add_page(&b->bio, virt_to_page(ptr),
-				  len < PAGE_SIZE ? len : PAGE_SIZE,
-				  virt_to_phys(ptr) & (PAGE_SIZE - 1))) {
+		if (unlikely(!bio_add_page(&b->bio, virt_to_page(ptr),
+					len < PAGE_SIZE ? len : PAGE_SIZE,
+					virt_to_phys(ptr) & (PAGE_SIZE - 1)))) {
 			BUG_ON(b->c->block_size <= PAGE_SIZE);
 			use_dmio(b, rw, block, end_io);
+			return;
 		}
 
 		len -= PAGE_SIZE;
@@ -493,8 +500,10 @@ static void use_inline_bio(struct dm_buf
 static void submit_io(struct dm_buffer *b, int rw, sector_t block,
 		      bio_end_io_t *end_io)
 {
-	if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
-	    b->data_mode != DATA_MODE_VMALLOC)
+	if (rw == WRITE && b->c->write_callback)
+		b->c->write_callback(b);
+	if (likely(b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE) &&
+	    likely(b->data_mode != DATA_MODE_VMALLOC))
 		use_inline_bio(b, rw, block, end_io);
 	else
 		use_dmio(b, rw, block, end_io);
@@ -514,7 +523,7 @@ static void write_endio(struct bio *bio,
 {
 	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 	b->write_error = error;
-	if (error) {
+	if (unlikely(error)) {
 		struct dm_bufio_client *c = b->c;
 		(void)cmpxchg(&c->async_write_error, 0, error);
 	}
@@ -550,8 +559,6 @@ static void __write_dirty_buffer(struct 
 	clear_bit(B_DIRTY, &b->state);
 	wait_on_bit_lock(&b->state, B_WRITING,
 			 do_io_schedule, TASK_UNINTERRUPTIBLE);
-	if (b->c->write_callback)
-		b->c->write_callback(b);
 	submit_io(b, WRITE, b->block, write_endio);
 }
 
@@ -572,7 +579,7 @@ static void __make_buffer_clean(struct d
 
 /*
  * Find some buffer that is not held by anybody, clean it, unlink it and
- * return it.  If "wait" is zero, try less hard and don't block.
+ * return it.
  */
 static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
 {
@@ -585,6 +592,7 @@ static struct dm_buffer *__get_unclaimed
 			__unlink_buffer(b);
 			return b;
 		}
+		dm_bufio_cond_resched();
 	}
 
 	list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
@@ -594,6 +602,7 @@ static struct dm_buffer *__get_unclaimed
 			__unlink_buffer(b);
 			return b;
 		}
+		dm_bufio_cond_resched();
 	}
 	return NULL;
 }
@@ -636,18 +645,21 @@ static struct dm_buffer *__alloc_buffer_
 		 * This is useful for debugging. When we set cache size to 1,
 		 * no new buffers are allocated at all.
 		 */
-		if (dm_bufio_cache_size_latch != 1) {
+		if (likely(dm_bufio_cache_size_latch != 1)) {
 			/*
-			 * dm-bufio is resistant to allocation failures (it just keeps
-			 * one buffer reserved in cases all the allocations fail).
+			 * dm-bufio is resistant to allocation failures (it
+			 * just keeps one buffer reserved in cases all the
+			 * allocations fail).
 			 * So set flags to not try too hard:
 			 *	GFP_NOIO: don't recurse into the I/O layer
-			 *	__GFP_NORETRY: don't retry and rather return failure
+			 *	__GFP_NORETRY: don't retry and rather return
+			 *			failure
 			 *	__GFP_NOMEMALLOC: don't use emergency reserves
-			 *	__GFP_NOWARN: don't print a warning in case of failure
+			 *	__GFP_NOWARN: don't print a warning in case of
+			 *			failure
 			 */
 			b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
-			if (b)
+			if (likely(b != NULL))
 				return b;
 		}
 
@@ -670,7 +682,7 @@ static struct dm_buffer *__alloc_buffer_
 static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c)
 {
 	struct dm_buffer *b = __alloc_buffer_wait_no_callback(c);
-	if (b && c->alloc_callback)
+	if (c->alloc_callback)
 		c->alloc_callback(b);
 	return b;
 }
@@ -682,7 +694,7 @@ static void __free_buffer_wake(struct dm
 {
 	struct dm_bufio_client *c = b->c;
 
-	if (c->need_reserved_buffers) {
+	if (unlikely(c->need_reserved_buffers != 0)) {
 		list_add(&b->lru_list, &c->reserved_buffers);
 		c->need_reserved_buffers--;
 	} else
@@ -704,6 +716,7 @@ static void __write_dirty_buffers_async(
 		if (no_wait && test_bit(B_WRITING, &b->state))
 			return;
 		__write_dirty_buffer(b);
+		dm_bufio_cond_resched();
 	}
 }
 
@@ -716,7 +729,7 @@ static void __get_memory_limit(struct dm
 {
 	unsigned long buffers;
 
-	if (dm_bufio_cache_size != dm_bufio_cache_size_latch) {
+	if (unlikely(dm_bufio_cache_size != dm_bufio_cache_size_latch)) {
 		mutex_lock(&dm_bufio_clients_lock);
 		__cache_size_refresh();
 		mutex_unlock(&dm_bufio_clients_lock);
@@ -724,7 +737,7 @@ static void __get_memory_limit(struct dm
 
 	buffers = dm_bufio_cache_size_per_client >>
 		  (c->sectors_per_block_bits + SECTOR_SHIFT);
-	if (buffers < DM_BUFIO_MIN_BUFFERS)
+	if (unlikely(buffers < DM_BUFIO_MIN_BUFFERS))
 		buffers = DM_BUFIO_MIN_BUFFERS;
 	*limit_buffers = buffers;
 	*threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100;
@@ -747,6 +760,7 @@ static void __check_watermark(struct dm_
 		if (!b)
 			return;
 		__free_buffer_wake(b);
+		dm_bufio_cond_resched();
 	}
 	if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
 		__write_dirty_buffers_async(c, 1);
@@ -758,8 +772,9 @@ static struct dm_buffer *__find(struct d
 	struct dm_buffer *b;
 	struct hlist_node *hn;
 	hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)], hash_list) {
-		if (b->block == block)
+		if (likely(b->block == block))
 			return b;
+		dm_bufio_cond_resched();
 	}
 
 	return NULL;
@@ -775,12 +790,13 @@ enum new_flag {
 	NF_GET = 2
 };
 
-static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, enum new_flag nf,
-				     struct dm_buffer **bp, int *need_submit)
+static void read_endio(struct bio *bio, int error);
+
+static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
+				     enum new_flag nf, struct dm_buffer **bp)
 {
 	struct dm_buffer *b, *new_b = NULL;
 
-	*need_submit = 0;
 	b = __find(c, block);
 	if (b) {
 		b->hold_count++;
@@ -821,7 +837,9 @@ static struct dm_buffer *__bufio_new(str
 	}
 
 	b->state = 1 << B_READING;
-	*need_submit = 1;
+
+	submit_io(b, READ, b->block, read_endio);
+
 	return b;
 }
 
@@ -849,22 +867,18 @@ static void read_endio(struct bio *bio, 
 static void *new_read(struct dm_bufio_client *c, sector_t block, enum new_flag nf,
 		      struct dm_buffer **bp)
 {
-	int need_submit;
 	struct dm_buffer *b;
 
 	dm_bufio_lock(c);
-	b = __bufio_new(c, block, nf, bp, &need_submit);
+	b = __bufio_new(c, block, nf, bp);
 	dm_bufio_unlock(c);
 
-	if (!b || IS_ERR(b))
+	if (unlikely(!b) || unlikely(IS_ERR(b)))
 		return b;
 	else {
-		if (need_submit)
-			submit_io(b, READ, b->block, read_endio);
-
 		wait_on_bit(&b->state, B_READING,
 			    do_io_schedule, TASK_UNINTERRUPTIBLE);
-		if (b->read_error != 0) {
+		if (unlikely(b->read_error != 0)) {
 			int error = b->read_error;
 			dm_bufio_release(b);
 			return ERR_PTR(error);
@@ -904,7 +918,7 @@ void dm_bufio_release(struct dm_buffer *
 	BUG_ON(test_bit(B_READING, &b->state));
 	BUG_ON(!b->hold_count);
 	b->hold_count--;
-	if (!b->hold_count) {
+	if (likely(!b->hold_count)) {
 		wake_up(&c->free_buffer_wait);
 
 		/*
@@ -912,7 +926,7 @@ void dm_bufio_release(struct dm_buffer *
 		 * to be written, free the buffer. There is no point in caching
 		 * invalid buffer.
 		 */
-		if ((b->read_error || b->write_error) &&
+		if (unlikely((b->read_error | b->write_error) != 0) &&
 		    !test_bit(B_WRITING, &b->state) &&
 		    !test_bit(B_DIRTY, &b->state)) {
 			__unlink_buffer(b);
@@ -999,15 +1013,19 @@ again:
 		 * someone is doing some writes simultaneously with us --- in
 		 * this case, stop dropping the lock.
 		 */
+		dm_bufio_cond_resched();
 		if (dropped_lock)
 			goto again;
 	}
 	wake_up(&c->free_buffer_wait);
 	dm_bufio_unlock(c);
 
-	a = xchg(&c->async_write_error, 0);
+	if (likely(!c->async_write_error))
+		a = 0;
+	else
+		a = xchg(&c->async_write_error, 0);
 	f = dm_bufio_issue_flush(c);
-	if (a)
+	if (unlikely(a != 0))
 		return a;
 	return f;
 }
@@ -1071,7 +1089,7 @@ retry:
 	BUG_ON(!b->hold_count);
 	BUG_ON(test_bit(B_READING, &b->state));
 	__write_dirty_buffer(b);
-	if (b->hold_count == 1) {
+	if (likely(b->hold_count == 1)) {
 		wait_on_bit(&b->state, B_WRITING,
 			    do_io_schedule, TASK_UNINTERRUPTIBLE);
 		set_bit(B_DIRTY, &b->state);
@@ -1193,6 +1211,7 @@ static void __scan(struct dm_bufio_clien
 				if (!--nr_to_scan)
 					return;
 			}
+			dm_bufio_cond_resched();
 		}
 	}
 }
@@ -1406,9 +1425,11 @@ static void cleanup_old_buffers(void)
 				       struct dm_buffer, lru_list);
 			if (__cleanup_old_buffer(b, 0, max_age))
 				break;
+			dm_bufio_cond_resched();
 		}
 
 		dm_bufio_unlock(c);
+		dm_bufio_cond_resched();
 	}
 	mutex_unlock(&dm_bufio_clients_lock);
 }
Index: linux-3.1-rc3-fast/drivers/md/dm-bufio.h
===================================================================
--- linux-3.1-rc3-fast.orig/drivers/md/dm-bufio.h	2011-10-14 20:56:45.000000000 +0200
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,110 +0,0 @@
-/*
- * Copyright (C) 2009-2011 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_BUFIO_H
-#define DM_BUFIO_H
-
-#include <linux/blkdev.h>
-#include <linux/types.h>
-
-/*----------------------------------------------------------------*/
-
-struct dm_bufio_client;
-struct dm_buffer;
-
-/*
- * Create a buffered IO cache on a given device
- */
-struct dm_bufio_client *
-dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
-		       unsigned reserved_buffers, unsigned aux_size,
-		       void (*alloc_callback)(struct dm_buffer *),
-		       void (*write_callback)(struct dm_buffer *));
-
-/*
- * Release a buffered IO cache.
- */
-void dm_bufio_client_destroy(struct dm_bufio_client *c);
-
-/*
- * WARNING: to avoid deadlocks, these conditions are observed:
- *
- * - At most one thread can hold at most "reserved_buffers" simultaneously.
- * - Each other threads can hold at most one buffer.
- * - Threads which call only dm_bufio_get can hold unlimited number of
- *   buffers.
- */
-
-/*
- * Read a given block from disk. Returns pointer to data.  Returns a
- * pointer to dm_buffer that can be used to release the buffer or to make
- * it dirty.
- */
-void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
-		    struct dm_buffer **bp);
-
-/*
- * Like dm_bufio_read, but return buffer from cache, don't read
- * it. If the buffer is not in the cache, return NULL.
- */
-void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
-		   struct dm_buffer **bp);
-
-/*
- * Like dm_bufio_read, but don't read anything from the disk.  It is
- * expected that the caller initializes the buffer and marks it dirty.
- */
-void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
-		   struct dm_buffer **bp);
-
-/*
- * Release a reference obtained with dm_bufio_{read,get,new}. The data
- * pointer and dm_buffer pointer is no longer valid after this call.
- */
-void dm_bufio_release(struct dm_buffer *b);
-
-/*
- * Mark a buffer dirty. It should be called after the buffer is modified.
- *
- * In case of memory pressure, the buffer may be written after
- * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers.  So
- * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but
- * the actual writing may occur earlier.
- */
-void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
-
-/*
- * Initiate writing of dirty buffers, without waiting for completion.
- */
-void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
-
-/*
- * Write all dirty buffers. Guarantees that all dirty buffers created prior
- * to this call are on disk when this call exits.
- */
-int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c);
-
-/*
- * Send an empty write barrier to the device to flush hardware disk cache.
- */
-int dm_bufio_issue_flush(struct dm_bufio_client *c);
-
-/*
- * Like dm_bufio_release but also move the buffer to the new
- * block. dm_bufio_write_dirty_buffers is needed to commit the new block.
- */
-void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block);
-
-unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
-sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
-sector_t dm_bufio_get_block_number(struct dm_buffer *b);
-void *dm_bufio_get_block_data(struct dm_buffer *b);
-void *dm_bufio_get_aux_data(struct dm_buffer *b);
-struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b);
-
-/*----------------------------------------------------------------*/
-
-#endif
Index: linux-3.1-rc3-fast/include/linux/dm-bufio.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-3.1-rc3-fast/include/linux/dm-bufio.h	2011-10-14 20:56:46.000000000 +0200
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2009-2011 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_BUFIO_H
+#define DM_BUFIO_H
+
+#include <linux/blkdev.h>
+#include <linux/types.h>
+
+/*----------------------------------------------------------------*/
+
+struct dm_bufio_client;
+struct dm_buffer;
+
+/*
+ * Create a buffered IO cache on a given device
+ */
+struct dm_bufio_client *
+dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
+		       unsigned reserved_buffers, unsigned aux_size,
+		       void (*alloc_callback)(struct dm_buffer *),
+		       void (*write_callback)(struct dm_buffer *));
+
+/*
+ * Release a buffered IO cache.
+ */
+void dm_bufio_client_destroy(struct dm_bufio_client *c);
+
+/*
+ * WARNING: to avoid deadlocks, these conditions are observed:
+ *
+ * - At most one thread can hold at most "reserved_buffers" simultaneously.
+ * - Each other threads can hold at most one buffer.
+ * - Threads which call only dm_bufio_get can hold unlimited number of
+ *   buffers.
+ */
+
+/*
+ * Read a given block from disk. Returns pointer to data.  Returns a
+ * pointer to dm_buffer that can be used to release the buffer or to make
+ * it dirty.
+ */
+void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
+		    struct dm_buffer **bp);
+
+/*
+ * Like dm_bufio_read, but return buffer from cache, don't read
+ * it. If the buffer is not in the cache, return NULL.
+ */
+void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
+		   struct dm_buffer **bp);
+
+/*
+ * Like dm_bufio_read, but don't read anything from the disk.  It is
+ * expected that the caller initializes the buffer and marks it dirty.
+ */
+void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
+		   struct dm_buffer **bp);
+
+/*
+ * Release a reference obtained with dm_bufio_{read,get,new}. The data
+ * pointer and dm_buffer pointer is no longer valid after this call.
+ */
+void dm_bufio_release(struct dm_buffer *b);
+
+/*
+ * Mark a buffer dirty. It should be called after the buffer is modified.
+ *
+ * In case of memory pressure, the buffer may be written after
+ * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers.  So
+ * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but
+ * the actual writing may occur earlier.
+ */
+void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
+
+/*
+ * Initiate writing of dirty buffers, without waiting for completion.
+ */
+void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
+
+/*
+ * Write all dirty buffers. Guarantees that all dirty buffers created prior
+ * to this call are on disk when this call exits.
+ */
+int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c);
+
+/*
+ * Send an empty write barrier to the device to flush hardware disk cache.
+ */
+int dm_bufio_issue_flush(struct dm_bufio_client *c);
+
+/*
+ * Like dm_bufio_release but also move the buffer to the new
+ * block. dm_bufio_write_dirty_buffers is needed to commit the new block.
+ */
+void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block);
+
+unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
+sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
+sector_t dm_bufio_get_block_number(struct dm_buffer *b);
+void *dm_bufio_get_block_data(struct dm_buffer *b);
+void *dm_bufio_get_aux_data(struct dm_buffer *b);
+struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b);
+
+/*----------------------------------------------------------------*/
+
+#endif

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-14 19:14 [PATCH] dm-bufio Mikulas Patocka
@ 2011-10-17 10:08 ` Joe Thornber
  2011-10-17 12:41   ` dm-bufio Mike Snitzer
  2011-10-17 14:04   ` [PATCH] dm-bufio Mikulas Patocka
  2011-10-17 10:29 ` Joe Thornber
                   ` (7 subsequent siblings)
  8 siblings, 2 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 10:08 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon

On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> Hi
> 
> This is a patch for dm-bufio.
> 
> Changes:
> * fixed a bug in i/o submission, introduced by someone who changed the 
> code

Could you point me at the specific part of this patch that does this
please?

> * simplified some constructs
> * more likely/unlikely hints

They clutter the code, and have been used without discrimination.  What
is the point of using it on a slow path?

> * dm-bufio.h moved from drivers/md to include/linux

Who outside dm do you expect to use this?

> * put cond_resched() to loops (it was there originally and then someone 
> deleted it)

If you're going to use cond_resched() at least do so a little more
intelligently than putting it in _every_ loop.  For instance you call it on
every iteration of a sweep through the hash table.  The call to
cond_resched will take more time than the loop body.  At least make a
change so it's only done every n'th iteration.


Can I also point out that I asked you to make a lot of these changes
over a year ago.  You've only got yourself to blame if 'someone' does
it for you.

- Someone




> 
> Mikulas
> 
> ---
>  drivers/md/Kconfig       |    2 
>  drivers/md/dm-bufio.c    |   95 ++++++++++++++++++++++++----------------
>  drivers/md/dm-bufio.h    |  110 -----------------------------------------------
>  include/linux/dm-bufio.h |  110 +++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 169 insertions(+), 148 deletions(-)
> 
> Index: linux-3.1-rc3-fast/drivers/md/Kconfig
> ===================================================================
> --- linux-3.1-rc3-fast.orig/drivers/md/Kconfig	2011-10-14 20:56:45.000000000 +0200
> +++ linux-3.1-rc3-fast/drivers/md/Kconfig	2011-10-14 20:56:46.000000000 +0200
> @@ -209,7 +209,7 @@ config DM_DEBUG
>  	  If unsure, say N.
>  
>  config DM_BUFIO
> -       tristate
> +       tristate "Buffered IO"
>         depends on BLK_DEV_DM && EXPERIMENTAL
>         ---help---
>  	 This interface allows you to do buffered I/O on a device and acts
> Index: linux-3.1-rc3-fast/drivers/md/dm-bufio.c
> ===================================================================
> --- linux-3.1-rc3-fast.orig/drivers/md/dm-bufio.c	2011-10-14 20:56:45.000000000 +0200
> +++ linux-3.1-rc3-fast/drivers/md/dm-bufio.c	2011-10-14 20:57:10.000000000 +0200
> @@ -167,6 +167,12 @@ static void dm_bufio_unlock(struct dm_bu
>  	mutex_unlock(&c->lock);
>  }
>  
> +#ifdef CONFIG_PREEMPT
> +#define dm_bufio_cond_resched()		do { } while (0)
> +#else
> +#define dm_bufio_cond_resched()		cond_resched()
> +#endif
> +
>  /*----------------------------------------------------------------*/
>  
>  /* Default cache size --- available memory divided by the ratio */
> @@ -470,17 +476,18 @@ static void use_inline_bio(struct dm_buf
>  	ptr = b->data;
>  	len = b->c->block_size;
>  
> -	if (len >= PAGE_SIZE)
> +	if (likely(len >= PAGE_SIZE))
>  		BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
>  	else
>  		BUG_ON((unsigned long)ptr & (len - 1));
>  
>  	do {
> -		if (!bio_add_page(&b->bio, virt_to_page(ptr),
> -				  len < PAGE_SIZE ? len : PAGE_SIZE,
> -				  virt_to_phys(ptr) & (PAGE_SIZE - 1))) {
> +		if (unlikely(!bio_add_page(&b->bio, virt_to_page(ptr),
> +					len < PAGE_SIZE ? len : PAGE_SIZE,
> +					virt_to_phys(ptr) & (PAGE_SIZE - 1)))) {
>  			BUG_ON(b->c->block_size <= PAGE_SIZE);
>  			use_dmio(b, rw, block, end_io);
> +			return;
>  		}
>  
>  		len -= PAGE_SIZE;
> @@ -493,8 +500,10 @@ static void use_inline_bio(struct dm_buf
>  static void submit_io(struct dm_buffer *b, int rw, sector_t block,
>  		      bio_end_io_t *end_io)
>  {
> -	if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
> -	    b->data_mode != DATA_MODE_VMALLOC)
> +	if (rw == WRITE && b->c->write_callback)
> +		b->c->write_callback(b);
> +	if (likely(b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE) &&
> +	    likely(b->data_mode != DATA_MODE_VMALLOC))
>  		use_inline_bio(b, rw, block, end_io);
>  	else
>  		use_dmio(b, rw, block, end_io);
> @@ -514,7 +523,7 @@ static void write_endio(struct bio *bio,
>  {
>  	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
>  	b->write_error = error;
> -	if (error) {
> +	if (unlikely(error)) {
>  		struct dm_bufio_client *c = b->c;
>  		(void)cmpxchg(&c->async_write_error, 0, error);
>  	}
> @@ -550,8 +559,6 @@ static void __write_dirty_buffer(struct 
>  	clear_bit(B_DIRTY, &b->state);
>  	wait_on_bit_lock(&b->state, B_WRITING,
>  			 do_io_schedule, TASK_UNINTERRUPTIBLE);
> -	if (b->c->write_callback)
> -		b->c->write_callback(b);
>  	submit_io(b, WRITE, b->block, write_endio);
>  }
>  
> @@ -572,7 +579,7 @@ static void __make_buffer_clean(struct d
>  
>  /*
>   * Find some buffer that is not held by anybody, clean it, unlink it and
> - * return it.  If "wait" is zero, try less hard and don't block.
> + * return it.
>   */
>  static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
>  {
> @@ -585,6 +592,7 @@ static struct dm_buffer *__get_unclaimed
>  			__unlink_buffer(b);
>  			return b;
>  		}
> +		dm_bufio_cond_resched();
>  	}
>  
>  	list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
> @@ -594,6 +602,7 @@ static struct dm_buffer *__get_unclaimed
>  			__unlink_buffer(b);
>  			return b;
>  		}
> +		dm_bufio_cond_resched();
>  	}
>  	return NULL;
>  }
> @@ -636,18 +645,21 @@ static struct dm_buffer *__alloc_buffer_
>  		 * This is useful for debugging. When we set cache size to 1,
>  		 * no new buffers are allocated at all.
>  		 */
> -		if (dm_bufio_cache_size_latch != 1) {
> +		if (likely(dm_bufio_cache_size_latch != 1)) {
>  			/*
> -			 * dm-bufio is resistant to allocation failures (it just keeps
> -			 * one buffer reserved in cases all the allocations fail).
> +			 * dm-bufio is resistant to allocation failures (it
> +			 * just keeps one buffer reserved in cases all the
> +			 * allocations fail).
>  			 * So set flags to not try too hard:
>  			 *	GFP_NOIO: don't recurse into the I/O layer
> -			 *	__GFP_NORETRY: don't retry and rather return failure
> +			 *	__GFP_NORETRY: don't retry and rather return
> +			 *			failure
>  			 *	__GFP_NOMEMALLOC: don't use emergency reserves
> -			 *	__GFP_NOWARN: don't print a warning in case of failure
> +			 *	__GFP_NOWARN: don't print a warning in case of
> +			 *			failure
>  			 */
>  			b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
> -			if (b)
> +			if (likely(b != NULL))
>  				return b;
>  		}
>  
> @@ -670,7 +682,7 @@ static struct dm_buffer *__alloc_buffer_
>  static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c)
>  {
>  	struct dm_buffer *b = __alloc_buffer_wait_no_callback(c);
> -	if (b && c->alloc_callback)
> +	if (c->alloc_callback)
>  		c->alloc_callback(b);
>  	return b;
>  }
> @@ -682,7 +694,7 @@ static void __free_buffer_wake(struct dm
>  {
>  	struct dm_bufio_client *c = b->c;
>  
> -	if (c->need_reserved_buffers) {
> +	if (unlikely(c->need_reserved_buffers != 0)) {
>  		list_add(&b->lru_list, &c->reserved_buffers);
>  		c->need_reserved_buffers--;
>  	} else
> @@ -704,6 +716,7 @@ static void __write_dirty_buffers_async(
>  		if (no_wait && test_bit(B_WRITING, &b->state))
>  			return;
>  		__write_dirty_buffer(b);
> +		dm_bufio_cond_resched();
>  	}
>  }
>  
> @@ -716,7 +729,7 @@ static void __get_memory_limit(struct dm
>  {
>  	unsigned long buffers;
>  
> -	if (dm_bufio_cache_size != dm_bufio_cache_size_latch) {
> +	if (unlikely(dm_bufio_cache_size != dm_bufio_cache_size_latch)) {
>  		mutex_lock(&dm_bufio_clients_lock);
>  		__cache_size_refresh();
>  		mutex_unlock(&dm_bufio_clients_lock);
> @@ -724,7 +737,7 @@ static void __get_memory_limit(struct dm
>  
>  	buffers = dm_bufio_cache_size_per_client >>
>  		  (c->sectors_per_block_bits + SECTOR_SHIFT);
> -	if (buffers < DM_BUFIO_MIN_BUFFERS)
> +	if (unlikely(buffers < DM_BUFIO_MIN_BUFFERS))
>  		buffers = DM_BUFIO_MIN_BUFFERS;
>  	*limit_buffers = buffers;
>  	*threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100;
> @@ -747,6 +760,7 @@ static void __check_watermark(struct dm_
>  		if (!b)
>  			return;
>  		__free_buffer_wake(b);
> +		dm_bufio_cond_resched();
>  	}
>  	if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
>  		__write_dirty_buffers_async(c, 1);
> @@ -758,8 +772,9 @@ static struct dm_buffer *__find(struct d
>  	struct dm_buffer *b;
>  	struct hlist_node *hn;
>  	hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)], hash_list) {
> -		if (b->block == block)
> +		if (likely(b->block == block))
>  			return b;
> +		dm_bufio_cond_resched();
>  	}
>  
>  	return NULL;
> @@ -775,12 +790,13 @@ enum new_flag {
>  	NF_GET = 2
>  };
>  
> -static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, enum new_flag nf,
> -				     struct dm_buffer **bp, int *need_submit)
> +static void read_endio(struct bio *bio, int error);
> +
> +static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
> +				     enum new_flag nf, struct dm_buffer **bp)
>  {
>  	struct dm_buffer *b, *new_b = NULL;
>  
> -	*need_submit = 0;
>  	b = __find(c, block);
>  	if (b) {
>  		b->hold_count++;
> @@ -821,7 +837,9 @@ static struct dm_buffer *__bufio_new(str
>  	}
>  
>  	b->state = 1 << B_READING;
> -	*need_submit = 1;
> +
> +	submit_io(b, READ, b->block, read_endio);
> +
>  	return b;
>  }
>  
> @@ -849,22 +867,18 @@ static void read_endio(struct bio *bio, 
>  static void *new_read(struct dm_bufio_client *c, sector_t block, enum new_flag nf,
>  		      struct dm_buffer **bp)
>  {
> -	int need_submit;
>  	struct dm_buffer *b;
>  
>  	dm_bufio_lock(c);
> -	b = __bufio_new(c, block, nf, bp, &need_submit);
> +	b = __bufio_new(c, block, nf, bp);
>  	dm_bufio_unlock(c);
>  
> -	if (!b || IS_ERR(b))
> +	if (unlikely(!b) || unlikely(IS_ERR(b)))
>  		return b;
>  	else {
> -		if (need_submit)
> -			submit_io(b, READ, b->block, read_endio);
> -
>  		wait_on_bit(&b->state, B_READING,
>  			    do_io_schedule, TASK_UNINTERRUPTIBLE);
> -		if (b->read_error != 0) {
> +		if (unlikely(b->read_error != 0)) {
>  			int error = b->read_error;
>  			dm_bufio_release(b);
>  			return ERR_PTR(error);
> @@ -904,7 +918,7 @@ void dm_bufio_release(struct dm_buffer *
>  	BUG_ON(test_bit(B_READING, &b->state));
>  	BUG_ON(!b->hold_count);
>  	b->hold_count--;
> -	if (!b->hold_count) {
> +	if (likely(!b->hold_count)) {
>  		wake_up(&c->free_buffer_wait);
>  
>  		/*
> @@ -912,7 +926,7 @@ void dm_bufio_release(struct dm_buffer *
>  		 * to be written, free the buffer. There is no point in caching
>  		 * invalid buffer.
>  		 */
> -		if ((b->read_error || b->write_error) &&
> +		if (unlikely((b->read_error | b->write_error) != 0) &&
>  		    !test_bit(B_WRITING, &b->state) &&
>  		    !test_bit(B_DIRTY, &b->state)) {
>  			__unlink_buffer(b);
> @@ -999,15 +1013,19 @@ again:
>  		 * someone is doing some writes simultaneously with us --- in
>  		 * this case, stop dropping the lock.
>  		 */
> +		dm_bufio_cond_resched();
>  		if (dropped_lock)
>  			goto again;
>  	}
>  	wake_up(&c->free_buffer_wait);
>  	dm_bufio_unlock(c);
>  
> -	a = xchg(&c->async_write_error, 0);
> +	if (likely(!c->async_write_error))
> +		a = 0;
> +	else
> +		a = xchg(&c->async_write_error, 0);
>  	f = dm_bufio_issue_flush(c);
> -	if (a)
> +	if (unlikely(a != 0))
>  		return a;
>  	return f;
>  }
> @@ -1071,7 +1089,7 @@ retry:
>  	BUG_ON(!b->hold_count);
>  	BUG_ON(test_bit(B_READING, &b->state));
>  	__write_dirty_buffer(b);
> -	if (b->hold_count == 1) {
> +	if (likely(b->hold_count == 1)) {
>  		wait_on_bit(&b->state, B_WRITING,
>  			    do_io_schedule, TASK_UNINTERRUPTIBLE);
>  		set_bit(B_DIRTY, &b->state);
> @@ -1193,6 +1211,7 @@ static void __scan(struct dm_bufio_clien
>  				if (!--nr_to_scan)
>  					return;
>  			}
> +			dm_bufio_cond_resched();
>  		}
>  	}
>  }
> @@ -1406,9 +1425,11 @@ static void cleanup_old_buffers(void)
>  				       struct dm_buffer, lru_list);
>  			if (__cleanup_old_buffer(b, 0, max_age))
>  				break;
> +			dm_bufio_cond_resched();
>  		}
>  
>  		dm_bufio_unlock(c);
> +		dm_bufio_cond_resched();
>  	}
>  	mutex_unlock(&dm_bufio_clients_lock);
>  }
> Index: linux-3.1-rc3-fast/drivers/md/dm-bufio.h
> ===================================================================
> --- linux-3.1-rc3-fast.orig/drivers/md/dm-bufio.h	2011-10-14 20:56:45.000000000 +0200
> +++ /dev/null	1970-01-01 00:00:00.000000000 +0000
> @@ -1,110 +0,0 @@
> -/*
> - * Copyright (C) 2009-2011 2011 Red Hat, Inc.
> - *
> - * This file is released under the GPL.
> - */
> -
> -#ifndef DM_BUFIO_H
> -#define DM_BUFIO_H
> -
> -#include <linux/blkdev.h>
> -#include <linux/types.h>
> -
> -/*----------------------------------------------------------------*/
> -
> -struct dm_bufio_client;
> -struct dm_buffer;
> -
> -/*
> - * Create a buffered IO cache on a given device
> - */
> -struct dm_bufio_client *
> -dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
> -		       unsigned reserved_buffers, unsigned aux_size,
> -		       void (*alloc_callback)(struct dm_buffer *),
> -		       void (*write_callback)(struct dm_buffer *));
> -
> -/*
> - * Release a buffered IO cache.
> - */
> -void dm_bufio_client_destroy(struct dm_bufio_client *c);
> -
> -/*
> - * WARNING: to avoid deadlocks, these conditions are observed:
> - *
> - * - At most one thread can hold at most "reserved_buffers" simultaneously.
> - * - Each other threads can hold at most one buffer.
> - * - Threads which call only dm_bufio_get can hold unlimited number of
> - *   buffers.
> - */
> -
> -/*
> - * Read a given block from disk. Returns pointer to data.  Returns a
> - * pointer to dm_buffer that can be used to release the buffer or to make
> - * it dirty.
> - */
> -void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
> -		    struct dm_buffer **bp);
> -
> -/*
> - * Like dm_bufio_read, but return buffer from cache, don't read
> - * it. If the buffer is not in the cache, return NULL.
> - */
> -void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
> -		   struct dm_buffer **bp);
> -
> -/*
> - * Like dm_bufio_read, but don't read anything from the disk.  It is
> - * expected that the caller initializes the buffer and marks it dirty.
> - */
> -void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
> -		   struct dm_buffer **bp);
> -
> -/*
> - * Release a reference obtained with dm_bufio_{read,get,new}. The data
> - * pointer and dm_buffer pointer is no longer valid after this call.
> - */
> -void dm_bufio_release(struct dm_buffer *b);
> -
> -/*
> - * Mark a buffer dirty. It should be called after the buffer is modified.
> - *
> - * In case of memory pressure, the buffer may be written after
> - * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers.  So
> - * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but
> - * the actual writing may occur earlier.
> - */
> -void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
> -
> -/*
> - * Initiate writing of dirty buffers, without waiting for completion.
> - */
> -void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
> -
> -/*
> - * Write all dirty buffers. Guarantees that all dirty buffers created prior
> - * to this call are on disk when this call exits.
> - */
> -int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c);
> -
> -/*
> - * Send an empty write barrier to the device to flush hardware disk cache.
> - */
> -int dm_bufio_issue_flush(struct dm_bufio_client *c);
> -
> -/*
> - * Like dm_bufio_release but also move the buffer to the new
> - * block. dm_bufio_write_dirty_buffers is needed to commit the new block.
> - */
> -void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block);
> -
> -unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
> -sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
> -sector_t dm_bufio_get_block_number(struct dm_buffer *b);
> -void *dm_bufio_get_block_data(struct dm_buffer *b);
> -void *dm_bufio_get_aux_data(struct dm_buffer *b);
> -struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b);
> -
> -/*----------------------------------------------------------------*/
> -
> -#endif
> Index: linux-3.1-rc3-fast/include/linux/dm-bufio.h
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-3.1-rc3-fast/include/linux/dm-bufio.h	2011-10-14 20:56:46.000000000 +0200
> @@ -0,0 +1,110 @@
> +/*
> + * Copyright (C) 2009-2011 2011 Red Hat, Inc.
> + *
> + * This file is released under the GPL.
> + */
> +
> +#ifndef DM_BUFIO_H
> +#define DM_BUFIO_H
> +
> +#include <linux/blkdev.h>
> +#include <linux/types.h>
> +
> +/*----------------------------------------------------------------*/
> +
> +struct dm_bufio_client;
> +struct dm_buffer;
> +
> +/*
> + * Create a buffered IO cache on a given device
> + */
> +struct dm_bufio_client *
> +dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
> +		       unsigned reserved_buffers, unsigned aux_size,
> +		       void (*alloc_callback)(struct dm_buffer *),
> +		       void (*write_callback)(struct dm_buffer *));
> +
> +/*
> + * Release a buffered IO cache.
> + */
> +void dm_bufio_client_destroy(struct dm_bufio_client *c);
> +
> +/*
> + * WARNING: to avoid deadlocks, these conditions are observed:
> + *
> + * - At most one thread can hold at most "reserved_buffers" simultaneously.
> + * - Each other threads can hold at most one buffer.
> + * - Threads which call only dm_bufio_get can hold unlimited number of
> + *   buffers.
> + */
> +
> +/*
> + * Read a given block from disk. Returns pointer to data.  Returns a
> + * pointer to dm_buffer that can be used to release the buffer or to make
> + * it dirty.
> + */
> +void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
> +		    struct dm_buffer **bp);
> +
> +/*
> + * Like dm_bufio_read, but return buffer from cache, don't read
> + * it. If the buffer is not in the cache, return NULL.
> + */
> +void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
> +		   struct dm_buffer **bp);
> +
> +/*
> + * Like dm_bufio_read, but don't read anything from the disk.  It is
> + * expected that the caller initializes the buffer and marks it dirty.
> + */
> +void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
> +		   struct dm_buffer **bp);
> +
> +/*
> + * Release a reference obtained with dm_bufio_{read,get,new}. The data
> + * pointer and dm_buffer pointer is no longer valid after this call.
> + */
> +void dm_bufio_release(struct dm_buffer *b);
> +
> +/*
> + * Mark a buffer dirty. It should be called after the buffer is modified.
> + *
> + * In case of memory pressure, the buffer may be written after
> + * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers.  So
> + * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but
> + * the actual writing may occur earlier.
> + */
> +void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
> +
> +/*
> + * Initiate writing of dirty buffers, without waiting for completion.
> + */
> +void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
> +
> +/*
> + * Write all dirty buffers. Guarantees that all dirty buffers created prior
> + * to this call are on disk when this call exits.
> + */
> +int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c);
> +
> +/*
> + * Send an empty write barrier to the device to flush hardware disk cache.
> + */
> +int dm_bufio_issue_flush(struct dm_bufio_client *c);
> +
> +/*
> + * Like dm_bufio_release but also move the buffer to the new
> + * block. dm_bufio_write_dirty_buffers is needed to commit the new block.
> + */
> +void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block);
> +
> +unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
> +sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
> +sector_t dm_bufio_get_block_number(struct dm_buffer *b);
> +void *dm_bufio_get_block_data(struct dm_buffer *b);
> +void *dm_bufio_get_aux_data(struct dm_buffer *b);
> +struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b);
> +
> +/*----------------------------------------------------------------*/
> +
> +#endif

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-14 19:14 [PATCH] dm-bufio Mikulas Patocka
  2011-10-17 10:08 ` Joe Thornber
@ 2011-10-17 10:29 ` Joe Thornber
  2011-10-17 10:39 ` Joe Thornber
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 10:29 UTC (permalink / raw)
  To: device-mapper development; +Cc: Mike Snitzer, Alasdair G. Kergon

On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> --- linux-3.1-rc3-fast.orig/drivers/md/Kconfig	2011-10-14 20:56:45.000000000 +0200
> +++ linux-3.1-rc3-fast/drivers/md/Kconfig	2011-10-14 20:56:46.000000000 +0200
> @@ -209,7 +209,7 @@ config DM_DEBUG
>  	  If unsure, say N.
>  
>  config DM_BUFIO
> -       tristate
> +       tristate "Buffered IO"
>         depends on BLK_DEV_DM && EXPERIMENTAL
>         ---help---
>  	 This interface allows you to do buffered I/O on a device and acts

This makes bufio appear in 'menuconfig' and friends.  Why do you want this user visible?

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-14 19:14 [PATCH] dm-bufio Mikulas Patocka
  2011-10-17 10:08 ` Joe Thornber
  2011-10-17 10:29 ` Joe Thornber
@ 2011-10-17 10:39 ` Joe Thornber
  2011-10-17 10:43 ` Joe Thornber
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 10:39 UTC (permalink / raw)
  To: device-mapper development; +Cc: Mike Snitzer, Alasdair G. Kergon

On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
>  			use_dmio(b, rw, block, end_io);
> +			return;

ACK

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-14 19:14 [PATCH] dm-bufio Mikulas Patocka
                   ` (2 preceding siblings ...)
  2011-10-17 10:39 ` Joe Thornber
@ 2011-10-17 10:43 ` Joe Thornber
  2011-10-17 10:54 ` Joe Thornber
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 10:43 UTC (permalink / raw)
  To: device-mapper development; +Cc: Mike Snitzer, Alasdair G. Kergon

On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> @@ -572,7 +579,7 @@ static void __make_buffer_clean(struct d
>  
>  /*
>   * Find some buffer that is not held by anybody, clean it, unlink it and
> - * return it.  If "wait" is zero, try less hard and don't block.
> + * return it.
>   */

ACK

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-14 19:14 [PATCH] dm-bufio Mikulas Patocka
                   ` (3 preceding siblings ...)
  2011-10-17 10:43 ` Joe Thornber
@ 2011-10-17 10:54 ` Joe Thornber
  2011-10-17 13:47   ` Mikulas Patocka
  2011-10-17 10:57 ` Joe Thornber
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 10:54 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon

On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> -static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, enum new_flag nf,
> -				     struct dm_buffer **bp, int *need_submit)
> +static void read_endio(struct bio *bio, int error);
> +
> +static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
> +				     enum new_flag nf, struct dm_buffer **bp)
>  {
>  	struct dm_buffer *b, *new_b = NULL;
>  
> -	*need_submit = 0;
>  	b = __find(c, block);
>  	if (b) {
>  		b->hold_count++;
> @@ -821,7 +837,9 @@ static struct dm_buffer *__bufio_new(str
>  	}
>  
>  	b->state = 1 << B_READING;
> -	*need_submit = 1;
> +
> +	submit_io(b, READ, b->block, read_endio);
> +
>  	return b;
>  }
>  
> @@ -849,22 +867,18 @@ static void read_endio(struct bio *bio, 
>  static void *new_read(struct dm_bufio_client *c, sector_t block, enum new_flag nf,
>  		      struct dm_buffer **bp)
>  {
> -	int need_submit;
>  	struct dm_buffer *b;
>  
>  	dm_bufio_lock(c);
> -	b = __bufio_new(c, block, nf, bp, &need_submit);
> +	b = __bufio_new(c, block, nf, bp);
>  	dm_bufio_unlock(c);
>  
> 	if (!b || IS_ERR(b))
>  		return b;
>  	else {
> -		if (need_submit)
> -			submit_io(b, READ, b->block, read_endio);
> -
>  		wait_on_bit(&b->state, B_READING,
>  			    do_io_schedule, TASK_UNINTERRUPTIBLE);


This change means submit_io(), which can block, will be called with
the client lock held.  I don't see this as an improvement.  NACK.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-14 19:14 [PATCH] dm-bufio Mikulas Patocka
                   ` (4 preceding siblings ...)
  2011-10-17 10:54 ` Joe Thornber
@ 2011-10-17 10:57 ` Joe Thornber
  2011-10-17 11:11 ` Joe Thornber
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 10:57 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon

On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> @@ -670,7 +682,7 @@ static struct dm_buffer *__alloc_buffer_
>  static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c)
>  {
>  	struct dm_buffer *b = __alloc_buffer_wait_no_callback(c);
> -	if (b && c->alloc_callback)
> +	if (c->alloc_callback)
>  		c->alloc_callback(b);
>  	return b;
>  }

ACK.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-14 19:14 [PATCH] dm-bufio Mikulas Patocka
                   ` (5 preceding siblings ...)
  2011-10-17 10:57 ` Joe Thornber
@ 2011-10-17 11:11 ` Joe Thornber
  2011-10-17 14:05   ` Mikulas Patocka
  2011-10-17 11:14 ` Joe Thornber
  2011-10-17 11:29 ` Joe Thornber
  8 siblings, 1 reply; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 11:11 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon

On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
@@ -493,8 +500,10 @@ static void use_inline_bio(struct dm_buf
 static void submit_io(struct dm_buffer *b, int rw, sector_t block,
                      bio_end_io_t *end_io)
 {
-       if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
-           b->data_mode != DATA_MODE_VMALLOC)
+       if (rw == WRITE && b->c->write_callback)
+               b->c->write_callback(b);
        if (likely(b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE) &&
            likely(b->data_mode != DATA_MODE_VMALLOC))
                use_inline_bio(b, rw, block, end_io);
        else
                use_dmio(b, rw, block, end_io);
@@ -550,8 +559,6 @@ static void __write_dirty_buffer(struct
        clear_bit(B_DIRTY, &b->state);
        wait_on_bit_lock(&b->state, B_WRITING,
                         do_io_schedule, TASK_UNINTERRUPTIBLE);
-       if (b->c->write_callback)
-               b->c->write_callback(b);
        submit_io(b, WRITE, b->block, write_endio);
 }


This doesn't seem an improvement.  Except ... it changes the behaviour
of dm_bufio_release_move().  So was there a preexisting bug in
dm_bufio_release_move() that you're trying to fix with this patch?

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-14 19:14 [PATCH] dm-bufio Mikulas Patocka
                   ` (6 preceding siblings ...)
  2011-10-17 11:11 ` Joe Thornber
@ 2011-10-17 11:14 ` Joe Thornber
  2011-10-17 13:43   ` Mikulas Patocka
  2011-10-17 11:29 ` Joe Thornber
  8 siblings, 1 reply; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 11:14 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon

On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
@@ -999,15 +1013,19 @@ again:
                 * someone is doing some writes simultaneously with us --- in
                 * this case, stop dropping the lock.
                 */
                if (dropped_lock)
                        goto again;
        }
        wake_up(&c->free_buffer_wait);
        dm_bufio_unlock(c);

-       a = xchg(&c->async_write_error, 0);
+       if (likely(!c->async_write_error))
+               a = 0;
+       else
+               a = xchg(&c->async_write_error, 0);
        f = dm_bufio_issue_flush(c);


I didn't change this.  Is it also fixing a preexisting issue?

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-14 19:14 [PATCH] dm-bufio Mikulas Patocka
                   ` (7 preceding siblings ...)
  2011-10-17 11:14 ` Joe Thornber
@ 2011-10-17 11:29 ` Joe Thornber
  2011-10-17 16:24   ` Mikulas Patocka
  8 siblings, 1 reply; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 11:29 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon

On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> Hi
> 
> This is a patch for dm-bufio.

I've merged all I'm going to at this point and pushed to thin-dev.

If you put together a patch for the cond_resched stuff I'll take that;
providing you don't call it in a tight loop like here:

> @@ -758,8 +772,9 @@ static struct dm_buffer *__find(struct d
>  	struct dm_buffer *b;
>  	struct hlist_node *hn;
>  	hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)], hash_list) {
> -		if (b->block == block)
> +		if (likely(b->block == block))
>  			return b;
> +		dm_bufio_cond_resched();
>  	}


I'm not really interested in the likely/unlikely annotations.

- Joe

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2011-10-17 10:08 ` Joe Thornber
@ 2011-10-17 12:41   ` Mike Snitzer
  2011-10-17 14:04   ` [PATCH] dm-bufio Mikulas Patocka
  1 sibling, 0 replies; 51+ messages in thread
From: Mike Snitzer @ 2011-10-17 12:41 UTC (permalink / raw)
  To: Joe Thornber; +Cc: dm-devel, Mikulas Patocka, Alasdair G. Kergon

On Mon, Oct 17 2011 at  6:08am -0400,
Joe Thornber <thornber@redhat.com> wrote:

> On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> > Hi
> > 
> > This is a patch for dm-bufio.
> > 
> > Changes:
...
> > * dm-bufio.h moved from drivers/md to include/linux
> 
> Who outside dm do you expect to use this?

External DM target modules may have an interest (not great justification
but...).

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-17 11:14 ` Joe Thornber
@ 2011-10-17 13:43   ` Mikulas Patocka
  0 siblings, 0 replies; 51+ messages in thread
From: Mikulas Patocka @ 2011-10-17 13:43 UTC (permalink / raw)
  To: Joe Thornber; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon



On Mon, 17 Oct 2011, Joe Thornber wrote:

> On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> @@ -999,15 +1013,19 @@ again:
>                  * someone is doing some writes simultaneously with us --- in
>                  * this case, stop dropping the lock.
>                  */
>                 if (dropped_lock)
>                         goto again;
>         }
>         wake_up(&c->free_buffer_wait);
>         dm_bufio_unlock(c);
> 
> -       a = xchg(&c->async_write_error, 0);
> +       if (likely(!c->async_write_error))
> +               a = 0;
> +       else
> +               a = xchg(&c->async_write_error, 0);
>         f = dm_bufio_issue_flush(c);
> 
> 
> I didn't change this.  Is it also fixing a preexisting issue?

It doesn't fix anything. It just bypasses xchg for the most common case.

Mikulas

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-17 10:54 ` Joe Thornber
@ 2011-10-17 13:47   ` Mikulas Patocka
  2011-10-17 13:57     ` Joe Thornber
  0 siblings, 1 reply; 51+ messages in thread
From: Mikulas Patocka @ 2011-10-17 13:47 UTC (permalink / raw)
  To: Joe Thornber; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon



On Mon, 17 Oct 2011, Joe Thornber wrote:

> On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> > -static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, enum new_flag nf,
> > -				     struct dm_buffer **bp, int *need_submit)
> > +static void read_endio(struct bio *bio, int error);
> > +
> > +static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
> > +				     enum new_flag nf, struct dm_buffer **bp)
> >  {
> >  	struct dm_buffer *b, *new_b = NULL;
> >  
> > -	*need_submit = 0;
> >  	b = __find(c, block);
> >  	if (b) {
> >  		b->hold_count++;
> > @@ -821,7 +837,9 @@ static struct dm_buffer *__bufio_new(str
> >  	}
> >  
> >  	b->state = 1 << B_READING;
> > -	*need_submit = 1;
> > +
> > +	submit_io(b, READ, b->block, read_endio);
> > +
> >  	return b;
> >  }
> >  
> > @@ -849,22 +867,18 @@ static void read_endio(struct bio *bio, 
> >  static void *new_read(struct dm_bufio_client *c, sector_t block, enum new_flag nf,
> >  		      struct dm_buffer **bp)
> >  {
> > -	int need_submit;
> >  	struct dm_buffer *b;
> >  
> >  	dm_bufio_lock(c);
> > -	b = __bufio_new(c, block, nf, bp, &need_submit);
> > +	b = __bufio_new(c, block, nf, bp);
> >  	dm_bufio_unlock(c);
> >  
> > 	if (!b || IS_ERR(b))
> >  		return b;
> >  	else {
> > -		if (need_submit)
> > -			submit_io(b, READ, b->block, read_endio);
> > -
> >  		wait_on_bit(&b->state, B_READING,
> >  			    do_io_schedule, TASK_UNINTERRUPTIBLE);
> 
> 
> This change means submit_io(), which can block, will be called with
> the client lock held.  I don't see this as an improvement.  NACK.

dm-bufio is designed with the possibility that submit_io is called inside 
the lock. There are other places that do it too. But it's true that it 
wasn't called in the lock before.

I think the best thing would be to delete those functions and use the code 
that was there before.

Mikulas

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-17 13:47   ` Mikulas Patocka
@ 2011-10-17 13:57     ` Joe Thornber
  0 siblings, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 13:57 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon

On Mon, Oct 17, 2011 at 09:47:05AM -0400, Mikulas Patocka wrote:
> I think the best thing would be to delete those functions and use the code
> that was there before.

I know you do.  However, Alasdair made it clear that he was not
willing to push the code as it was and we don't have time to argue.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-17 10:08 ` Joe Thornber
  2011-10-17 12:41   ` dm-bufio Mike Snitzer
@ 2011-10-17 14:04   ` Mikulas Patocka
  2011-10-17 14:15     ` Joe Thornber
  1 sibling, 1 reply; 51+ messages in thread
From: Mikulas Patocka @ 2011-10-17 14:04 UTC (permalink / raw)
  To: Joe Thornber; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon



On Mon, 17 Oct 2011, Joe Thornber wrote:

> On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> > Hi
> > 
> > This is a patch for dm-bufio.
> > 
> > Changes:
> > * fixed a bug in i/o submission, introduced by someone who changed the 
> > code
> 
> Could you point me at the specific part of this patch that does this
> please?

this:
                        BUG_ON(b->c->block_size <= PAGE_SIZE);
                        use_dmio(b, rw, block, end_io);
+                       return;


> > * simplified some constructs
> > * more likely/unlikely hints
> 
> They clutter the code, and have been used without discrimination.  What
> is the point of using it on a slow path?

I think I added them only to the possible hot paths.

> > * dm-bufio.h moved from drivers/md to include/linux
> 
> Who outside dm do you expect to use this?

I don't know, Alasdair said that he wants it there (like 
include/linux/dm-io.h for example). BTW dm-bufio has nothing to do with 
device mapper actually, so it can be used by any code.

> > * put cond_resched() to loops (it was there originally and then someone 
> > deleted it)
> 
> If you're going to use cond_resched() at least do so a little more
> intelligently than putting it in _every_ loop.  For instance you call it on
> every iteration of a sweep through the hash table.  The call to
> cond_resched will take more time than the loop body.  At least make a
> change so it's only done every n'th iteration.

I think it would be better to use
#ifndef CONFIG_PREEMPT
if (need_resched()) cond_resched();
#endif

need_resched() is inlined and translates to a single condition.

I don't know why Linux doesn't provide a macro for it, this would be 
useful far beyond dm code.

Mikulas

> Can I also point out that I asked you to make a lot of these changes
> over a year ago.  You've only got yourself to blame if 'someone' does
> it for you.
> 
> - Someone

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-17 11:11 ` Joe Thornber
@ 2011-10-17 14:05   ` Mikulas Patocka
  2011-10-17 14:09     ` Joe Thornber
  0 siblings, 1 reply; 51+ messages in thread
From: Mikulas Patocka @ 2011-10-17 14:05 UTC (permalink / raw)
  To: Joe Thornber; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon



On Mon, 17 Oct 2011, Joe Thornber wrote:

> On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> @@ -493,8 +500,10 @@ static void use_inline_bio(struct dm_buf
>  static void submit_io(struct dm_buffer *b, int rw, sector_t block,
>                       bio_end_io_t *end_io)
>  {
> -       if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
> -           b->data_mode != DATA_MODE_VMALLOC)
> +       if (rw == WRITE && b->c->write_callback)
> +               b->c->write_callback(b);
>         if (likely(b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE) &&
>             likely(b->data_mode != DATA_MODE_VMALLOC))
>                 use_inline_bio(b, rw, block, end_io);
>         else
>                 use_dmio(b, rw, block, end_io);
> @@ -550,8 +559,6 @@ static void __write_dirty_buffer(struct
>         clear_bit(B_DIRTY, &b->state);
>         wait_on_bit_lock(&b->state, B_WRITING,
>                          do_io_schedule, TASK_UNINTERRUPTIBLE);
> -       if (b->c->write_callback)
> -               b->c->write_callback(b);
>         submit_io(b, WRITE, b->block, write_endio);
>  }
> 
> 
> This doesn't seem an improvement.  Except ... it changes the behaviour
> of dm_bufio_release_move().  So was there a preexisting bug in
> dm_bufio_release_move() that you're trying to fix with this patch?

The actual reason was to do this callback in dm_bufio_release_move() too 
--- just for consistency. (the user of dm_bufio_release_move() doesn't use 
write_callback anyway).

Mikulas

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-17 14:05   ` Mikulas Patocka
@ 2011-10-17 14:09     ` Joe Thornber
  2011-10-17 16:22       ` Mikulas Patocka
  0 siblings, 1 reply; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 14:09 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon

On Mon, Oct 17, 2011 at 10:05:26AM -0400, Mikulas Patocka wrote:
> 
> 
> On Mon, 17 Oct 2011, Joe Thornber wrote:
> 
> > On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> > @@ -493,8 +500,10 @@ static void use_inline_bio(struct dm_buf
> >  static void submit_io(struct dm_buffer *b, int rw, sector_t block,
> >                       bio_end_io_t *end_io)
> >  {
> > -       if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
> > -           b->data_mode != DATA_MODE_VMALLOC)
> > +       if (rw == WRITE && b->c->write_callback)
> > +               b->c->write_callback(b);
> >         if (likely(b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE) &&
> >             likely(b->data_mode != DATA_MODE_VMALLOC))
> >                 use_inline_bio(b, rw, block, end_io);
> >         else
> >                 use_dmio(b, rw, block, end_io);
> > @@ -550,8 +559,6 @@ static void __write_dirty_buffer(struct
> >         clear_bit(B_DIRTY, &b->state);
> >         wait_on_bit_lock(&b->state, B_WRITING,
> >                          do_io_schedule, TASK_UNINTERRUPTIBLE);
> > -       if (b->c->write_callback)
> > -               b->c->write_callback(b);
> >         submit_io(b, WRITE, b->block, write_endio);
> >  }
> > 
> > 
> > This doesn't seem an improvement.  Except ... it changes the behaviour
> > of dm_bufio_release_move().  So was there a preexisting bug in
> > dm_bufio_release_move() that you're trying to fix with this patch?
> 
> The actual reason was to do this callback in dm_bufio_release_move() too 
> --- just for consistency. (the user of dm_bufio_release_move() doesn't use 
> write_callback anyway).

thinp uses dm_bufio_release_move() and write_callback.  So yes, this
is a bug fix.  I thought so and merged.

- Joe

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-17 14:04   ` [PATCH] dm-bufio Mikulas Patocka
@ 2011-10-17 14:15     ` Joe Thornber
  0 siblings, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 14:15 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon

On Mon, Oct 17, 2011 at 10:04:16AM -0400, Mikulas Patocka wrote:
> > If you're going to use cond_resched() at least do so a little more
> > intelligently than putting it in _every_ loop.  For instance you call it on
> > every iteration of a sweep through the hash table.  The call to
> > cond_resched will take more time than the loop body.  At least make a
> > change so it's only done every n'th iteration.
> 
> I think it would be better to use
> #ifndef CONFIG_PREEMPT
> if (need_resched()) cond_resched();
> #endif
> 
> need_resched() is inlined and translates to a single condition.

Yep, that would be fine.

> I don't know why Linux doesn't provide a macro for it, this would be 
> useful far beyond dm code.

Agreed, I was very surprised that cond_resched() expands out to a
function call rather than a test + fn call.

Get a patch together and I'll merge.

- Joe

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-17 14:09     ` Joe Thornber
@ 2011-10-17 16:22       ` Mikulas Patocka
  2011-10-17 16:41         ` Joe Thornber
  2011-10-17 18:30         ` Joe Thornber
  0 siblings, 2 replies; 51+ messages in thread
From: Mikulas Patocka @ 2011-10-17 16:22 UTC (permalink / raw)
  To: Joe Thornber; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon



On Mon, 17 Oct 2011, Joe Thornber wrote:

> On Mon, Oct 17, 2011 at 10:05:26AM -0400, Mikulas Patocka wrote:
> > 
> > 
> > On Mon, 17 Oct 2011, Joe Thornber wrote:
> > 
> > > On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> > > @@ -493,8 +500,10 @@ static void use_inline_bio(struct dm_buf
> > >  static void submit_io(struct dm_buffer *b, int rw, sector_t block,
> > >                       bio_end_io_t *end_io)
> > >  {
> > > -       if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
> > > -           b->data_mode != DATA_MODE_VMALLOC)
> > > +       if (rw == WRITE && b->c->write_callback)
> > > +               b->c->write_callback(b);
> > >         if (likely(b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE) &&
> > >             likely(b->data_mode != DATA_MODE_VMALLOC))
> > >                 use_inline_bio(b, rw, block, end_io);
> > >         else
> > >                 use_dmio(b, rw, block, end_io);
> > > @@ -550,8 +559,6 @@ static void __write_dirty_buffer(struct
> > >         clear_bit(B_DIRTY, &b->state);
> > >         wait_on_bit_lock(&b->state, B_WRITING,
> > >                          do_io_schedule, TASK_UNINTERRUPTIBLE);
> > > -       if (b->c->write_callback)
> > > -               b->c->write_callback(b);
> > >         submit_io(b, WRITE, b->block, write_endio);
> > >  }
> > > 
> > > 
> > > This doesn't seem an improvement.  Except ... it changes the behaviour
> > > of dm_bufio_release_move().  So was there a preexisting bug in
> > > dm_bufio_release_move() that you're trying to fix with this patch?
> > 
> > The actual reason was to do this callback in dm_bufio_release_move() too 
> > --- just for consistency. (the user of dm_bufio_release_move() doesn't use 
> > write_callback anyway).
> 
> thinp uses dm_bufio_release_move() and write_callback.  So yes, this
> is a bug fix.  I thought so and merged.

BTW. it still uses the old block number in "prepare_for_write" callback. 
Do you use this block number somewhere? Should we link the buffer to the 
new block before the call and the link it back?

Mikulas

> - Joe
> 

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-17 11:29 ` Joe Thornber
@ 2011-10-17 16:24   ` Mikulas Patocka
  2011-10-17 16:43     ` Joe Thornber
  0 siblings, 1 reply; 51+ messages in thread
From: Mikulas Patocka @ 2011-10-17 16:24 UTC (permalink / raw)
  To: Joe Thornber; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon

On Mon, 17 Oct 2011, Joe Thornber wrote:

> On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> > Hi
> > 
> > This is a patch for dm-bufio.
> 
> I've merged all I'm going to at this point and pushed to thin-dev.

I think Alasdair currently holds the master version of this code. Or is it 
you? If we had three versions of the same code and don't know which one is 
the master, then there is a problem.

Mikulas

> If you put together a patch for the cond_resched stuff I'll take that;
> providing you don't call it in a tight loop like here:
> 
> > @@ -758,8 +772,9 @@ static struct dm_buffer *__find(struct d
> >  	struct dm_buffer *b;
> >  	struct hlist_node *hn;
> >  	hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)], hash_list) {
> > -		if (b->block == block)
> > +		if (likely(b->block == block))
> >  			return b;
> > +		dm_bufio_cond_resched();
> >  	}
> 
> 
> I'm not really interested in the likely/unlikely annotations.
> 
> - Joe
> 

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-17 16:22       ` Mikulas Patocka
@ 2011-10-17 16:41         ` Joe Thornber
  2011-10-17 18:30         ` Joe Thornber
  1 sibling, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 16:41 UTC (permalink / raw)
  To: device-mapper development; +Cc: Mike Snitzer, Alasdair G. Kergon

On Mon, Oct 17, 2011 at 12:22:13PM -0400, Mikulas Patocka wrote:
> > thinp uses dm_bufio_release_move() and write_callback.  So yes, this
> > is a bug fix.  I thought so and merged.
> 
> BTW. it still uses the old block number in "prepare_for_write" callback. 
> Do you use this block number somewhere? Should we link the buffer to the 
> new block before the call and the link it back?

Good catch, let me look into it.

- Joe

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-17 16:24   ` Mikulas Patocka
@ 2011-10-17 16:43     ` Joe Thornber
  0 siblings, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 16:43 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Mike Snitzer, Alasdair G. Kergon

On Mon, Oct 17, 2011 at 12:24:00PM -0400, Mikulas Patocka wrote:
> On Mon, 17 Oct 2011, Joe Thornber wrote:
> 
> > On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> > > Hi
> > > 
> > > This is a patch for dm-bufio.
> > 
> > I've merged all I'm going to at this point and pushed to thin-dev.
> 
> I think Alasdair currently holds the master version of this code. Or is it 
> you? If we had three versions of the same code and don't know which one is 
> the master, then there is a problem.

I consider my tree to be the master of thin-provisioning, and will be
very annoyed if anything gets pushed to linux-next that hasn't been
sent to me first.  dm-bufio is yours to maintain in the long term -
I'm just taking an interest because it's blocking acceptance of my
code.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH] dm-bufio
  2011-10-17 16:22       ` Mikulas Patocka
  2011-10-17 16:41         ` Joe Thornber
@ 2011-10-17 18:30         ` Joe Thornber
  1 sibling, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 18:30 UTC (permalink / raw)
  To: device-mapper development; +Cc: Mike Snitzer, Alasdair G. Kergon

On Mon, Oct 17, 2011 at 12:22:13PM -0400, Mikulas Patocka wrote:
> > thinp uses dm_bufio_release_move() and write_callback.  So yes, this
> > is a bug fix.  I thought so and merged.
> 
> BTW. it still uses the old block number in "prepare_for_write" callback. 
> Do you use this block number somewhere? Should we link the buffer to the 
> new block before the call and the link it back?

I double checked this; it's fine.  I use release_move as part of the
shadow operation in the transaction manager.  As such we know a write
lock is going to be grabbed on it straight away (which will dirty and
prompt a rewriting of the block nr).  Also in the event of a crash
before the next commit, the new block wont appear in the metadata at
all.

- Joe

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-24 18:51             ` dm-bufio Will Drewry
@ 2012-03-27 10:20               ` Kasatkin, Dmitry
  0 siblings, 0 replies; 51+ messages in thread
From: Kasatkin, Dmitry @ 2012-03-27 10:20 UTC (permalink / raw)
  To: Will Drewry; +Cc: device-mapper development, mpatocka, Zdenek Kabelac

On Sat, Mar 24, 2012 at 8:51 PM, Will Drewry <redpig@dataspill.org> wrote:
> On Fri, Mar 23, 2012 at 8:07 PM, Mikulas Patocka <mpatocka@redhat.com> wrote:
>>
>>
>> On Sat, 24 Mar 2012, Kasatkin, Dmitry wrote:
>>
>>> Hi,
>>>
>>> Thanks for clarification.
>>> Indeed everything works just with dm_bufio_write_dirty_buffers().
>>> Reboot notifier is to issue the flush only..
>>> As I understand, dm-bufio will do the flush but currently once per 10 seconds.
>>>
>>> if data on the block device and metadata on other block device get out
>>> of sync, what you can do then?
>>> how journal helps then?
>>>
>>> - Dmitry
>>
>> It depends what you're trying to do.
>>
>> If you're trying to do something like "dm-verity", but with a possibility
>> to write to the device, there are several possibilities:
>>
>> * keep two checksums per 512-byte sector, the old checksum and the new
>> checksum. If you update the block, you update the new checksum, sync the
>> metadata device and then write to the data device (obviously you need to
>> batch this update-sync-write for many blocks write concurrently to get
>> decent performance). When you verify block, you allow either checksum to
>> match. When you sync caches on the data device, you must forget all the
>> old checksums.
>>
>> * use journaling, write data block and its checksum to a journal. If the
>> computer crashes, you just replay the journal (so you don't care what data
>> was present at that place, you overwrite it with data from the journal).
>> The downside is that this doubles required i/o throughput, you should have
>> journal and data on different devices.
>>
>> * do nothing and rebuild the checksums in case of crash. It is simplest,
>> but it doesn't protect from data damages that happen due to the crash (for
>> example, some SSDs corrupt its metadata on power failure and you can't
>> detect this if you rebuild checksums after a power failure).
>>
>>> Yes.. I am aware of dm-verity target.
>>> It suites well for read-only cases.
>>> It is questionable how tree-based approach will work with read-write.
>>> Each single update will cause whole tree recalculation.
>>
>> A write would recalculate hashes only in the branch from tree bottom to
>> tree top. The obvious downside is that there is no protection from crash.
>
> It also depends on how you plan to assure the integrity of the data:
> Device-based symmetric key, asymmetric key, etc and the costs
> associated.  Local updates make integrity tricky -- will the device
> update itself or will signed updates be supplied, do they need to be
> online, does only a subsection need to be online, etc.
>
> It's likely that the tree updates won't be too expensive compared to
> the crypto and you could attempt to optimize tree updates along a hot
> path if needed (by breaking out hot subdirs to a separate targets) and
> explore other tricks for getting transaction oriented behavior (two
> swapping metadata devices for atomic tree updates, etc).  dm-verity
> was never locked into being a read-only target, but the lack of need
> to support online updates means the code and required changes don't
> exist.
>
> I'm sure any of us involved in dm-verity would be happy to discuss how
> it might be used for your purposes (or if it is really a bad fit),
> etc.
>
> cheers!
> will

Hello,

dm-verity looks extremely nice for read-only targets.
Just one root hash protects whole block device.
But I am interested in writable use case.
Would be nice to understand how it can be addresses as well...

Thanks,
Dmitry

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-24  1:07           ` dm-bufio Mikulas Patocka
  2012-03-24 18:51             ` dm-bufio Will Drewry
@ 2012-03-27  9:56             ` Kasatkin, Dmitry
  1 sibling, 0 replies; 51+ messages in thread
From: Kasatkin, Dmitry @ 2012-03-27  9:56 UTC (permalink / raw)
  To: device-mapper development; +Cc: Zdenek Kabelac

On Sat, Mar 24, 2012 at 3:07 AM, Mikulas Patocka <mpatocka@redhat.com> wrote:
>
>
> On Sat, 24 Mar 2012, Kasatkin, Dmitry wrote:
>
>> Hi,
>>
>> Thanks for clarification.
>> Indeed everything works just with dm_bufio_write_dirty_buffers().
>> Reboot notifier is to issue the flush only..
>> As I understand, dm-bufio will do the flush but currently once per 10 seconds.
>>
>> if data on the block device and metadata on other block device get out
>> of sync, what you can do then?
>> how journal helps then?
>>
>> - Dmitry
>
> It depends what you're trying to do.
>
> If you're trying to do something like "dm-verity", but with a possibility
> to write to the device, there are several possibilities:
>
> * keep two checksums per 512-byte sector, the old checksum and the new
> checksum. If you update the block, you update the new checksum, sync the
> metadata device and then write to the data device (obviously you need to
> batch this update-sync-write for many blocks write concurrently to get
> decent performance). When you verify block, you allow either checksum to
> match. When you sync caches on the data device, you must forget all the
> old checksums.
>

Right.. It requires double space and more IO.
The it will certainly be more stable to failures.
But what if data block will be corrupted during write due to power or
other failures?
In such case both checksums will not obviously match...

> * use journaling, write data block and its checksum to a journal. If the
> computer crashes, you just replay the journal (so you don't care what data
> was present at that place, you overwrite it with data from the journal).
> The downside is that this doubles required i/o throughput, you should have
> journal and data on different devices.
>

That looks definitely more reliable.


> * do nothing and rebuild the checksums in case of crash. It is simplest,
> but it doesn't protect from data damages that happen due to the crash (for
> example, some SSDs corrupt its metadata on power failure and you can't
> detect this if you rebuild checksums after a power failure).
>

easy and nice :)

>> Yes.. I am aware of dm-verity target.
>> It suites well for read-only cases.
>> It is questionable how tree-based approach will work with read-write.
>> Each single update will cause whole tree recalculation.
>
> A write would recalculate hashes only in the branch from tree bottom to
> tree top. The obvious downside is that there is no protection from crash.
>

Yes.. I noticed.

>
> BTW. regarding that reboot notifier with
> "dm_bufio_write_dirty_buffers(d->bufio)" ... there could be another
> problem ... what if other reboot notifier (maybe for a completely
> different driver) writes to the device after
> "dm_bufio_write_dirty_buffers(d->bufio)" was performed?
>

"Target" owns/writes to device...
 What other driver will do it?
 Also rootfs is re-mounted read-only before rebooting.
It is actually not about data device sync, but "hash device" sync.

> - would it be possible to install your notifier again?
>
> - or turn into a synchronous updates? --- i.e. set a flag in your reboot
> notifier and if the flag is on, call
> "dm_bufio_write_dirty_buffers(d->bufio)" after every write.
>

I see the idea.

> Mikulas
>

Thanks,

Dmitry

> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-24  1:07           ` dm-bufio Mikulas Patocka
@ 2012-03-24 18:51             ` Will Drewry
  2012-03-27 10:20               ` dm-bufio Kasatkin, Dmitry
  2012-03-27  9:56             ` dm-bufio Kasatkin, Dmitry
  1 sibling, 1 reply; 51+ messages in thread
From: Will Drewry @ 2012-03-24 18:51 UTC (permalink / raw)
  To: device-mapper development, Kasatkin, Dmitry; +Cc: mpatocka, Zdenek Kabelac

On Fri, Mar 23, 2012 at 8:07 PM, Mikulas Patocka <mpatocka@redhat.com> wrote:
>
>
> On Sat, 24 Mar 2012, Kasatkin, Dmitry wrote:
>
>> Hi,
>>
>> Thanks for clarification.
>> Indeed everything works just with dm_bufio_write_dirty_buffers().
>> Reboot notifier is to issue the flush only..
>> As I understand, dm-bufio will do the flush but currently once per 10 seconds.
>>
>> if data on the block device and metadata on other block device get out
>> of sync, what you can do then?
>> how journal helps then?
>>
>> - Dmitry
>
> It depends what you're trying to do.
>
> If you're trying to do something like "dm-verity", but with a possibility
> to write to the device, there are several possibilities:
>
> * keep two checksums per 512-byte sector, the old checksum and the new
> checksum. If you update the block, you update the new checksum, sync the
> metadata device and then write to the data device (obviously you need to
> batch this update-sync-write for many blocks write concurrently to get
> decent performance). When you verify block, you allow either checksum to
> match. When you sync caches on the data device, you must forget all the
> old checksums.
>
> * use journaling, write data block and its checksum to a journal. If the
> computer crashes, you just replay the journal (so you don't care what data
> was present at that place, you overwrite it with data from the journal).
> The downside is that this doubles required i/o throughput, you should have
> journal and data on different devices.
>
> * do nothing and rebuild the checksums in case of crash. It is simplest,
> but it doesn't protect from data damages that happen due to the crash (for
> example, some SSDs corrupt its metadata on power failure and you can't
> detect this if you rebuild checksums after a power failure).
>
>> Yes.. I am aware of dm-verity target.
>> It suites well for read-only cases.
>> It is questionable how tree-based approach will work with read-write.
>> Each single update will cause whole tree recalculation.
>
> A write would recalculate hashes only in the branch from tree bottom to
> tree top. The obvious downside is that there is no protection from crash.

It also depends on how you plan to assure the integrity of the data:
Device-based symmetric key, asymmetric key, etc and the costs
associated.  Local updates make integrity tricky -- will the device
update itself or will signed updates be supplied, do they need to be
online, does only a subsection need to be online, etc.

It's likely that the tree updates won't be too expensive compared to
the crypto and you could attempt to optimize tree updates along a hot
path if needed (by breaking out hot subdirs to a separate targets) and
explore other tricks for getting transaction oriented behavior (two
swapping metadata devices for atomic tree updates, etc).  dm-verity
was never locked into being a read-only target, but the lack of need
to support online updates means the code and required changes don't
exist.

I'm sure any of us involved in dm-verity would be happy to discuss how
it might be used for your purposes (or if it is really a bad fit),
etc.

cheers!
will

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-23 23:58         ` dm-bufio Kasatkin, Dmitry
@ 2012-03-24  1:07           ` Mikulas Patocka
  2012-03-24 18:51             ` dm-bufio Will Drewry
  2012-03-27  9:56             ` dm-bufio Kasatkin, Dmitry
  0 siblings, 2 replies; 51+ messages in thread
From: Mikulas Patocka @ 2012-03-24  1:07 UTC (permalink / raw)
  To: Kasatkin, Dmitry; +Cc: device-mapper development, Zdenek Kabelac



On Sat, 24 Mar 2012, Kasatkin, Dmitry wrote:

> Hi,
> 
> Thanks for clarification.
> Indeed everything works just with dm_bufio_write_dirty_buffers().
> Reboot notifier is to issue the flush only..
> As I understand, dm-bufio will do the flush but currently once per 10 seconds.
> 
> if data on the block device and metadata on other block device get out
> of sync, what you can do then?
> how journal helps then?
> 
> - Dmitry

It depends what you're trying to do.

If you're trying to do something like "dm-verity", but with a possibility 
to write to the device, there are several possibilities:

* keep two checksums per 512-byte sector, the old checksum and the new 
checksum. If you update the block, you update the new checksum, sync the 
metadata device and then write to the data device (obviously you need to 
batch this update-sync-write for many blocks write concurrently to get 
decent performance). When you verify block, you allow either checksum to 
match. When you sync caches on the data device, you must forget all the 
old checksums.

* use journaling, write data block and its checksum to a journal. If the 
computer crashes, you just replay the journal (so you don't care what data 
was present at that place, you overwrite it with data from the journal). 
The downside is that this doubles required i/o throughput, you should have 
journal and data on different devices.

* do nothing and rebuild the checksums in case of crash. It is simplest, 
but it doesn't protect from data damages that happen due to the crash (for 
example, some SSDs corrupt its metadata on power failure and you can't 
detect this if you rebuild checksums after a power failure).

> Yes.. I am aware of dm-verity target.
> It suites well for read-only cases.
> It is questionable how tree-based approach will work with read-write.
> Each single update will cause whole tree recalculation.

A write would recalculate hashes only in the branch from tree bottom to 
tree top. The obvious downside is that there is no protection from crash.


BTW. regarding that reboot notifier with 
"dm_bufio_write_dirty_buffers(d->bufio)" ... there could be another 
problem ... what if other reboot notifier (maybe for a completely 
different driver) writes to the device after 
"dm_bufio_write_dirty_buffers(d->bufio)" was performed?

- would it be possible to install your notifier again?

- or turn into a synchronous updates? --- i.e. set a flag in your reboot 
notifier and if the flag is on, call 
"dm_bufio_write_dirty_buffers(d->bufio)" after every write.

Mikulas

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-23 21:32               ` dm-bufio Mike Snitzer
@ 2012-03-24  0:04                 ` Kasatkin, Dmitry
  0 siblings, 0 replies; 51+ messages in thread
From: Kasatkin, Dmitry @ 2012-03-24  0:04 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: device-mapper development, mpatocka, Zdenek Kabelac

On Fri, Mar 23, 2012 at 11:32 PM, Mike Snitzer <snitzer@redhat.com> wrote:
> On Fri, Mar 23 2012 at 11:17am -0400,
> Kasatkin, Dmitry <dmitry.kasatkin@intel.com> wrote:
>
>> On Fri, Mar 23, 2012 at 4:32 PM, Mike Snitzer <snitzer@redhat.com> wrote:
>> > As an aside, just curious: what does your target do?
>>
>> I have experimented with integrity protection.
>
> Interesting.  Are you aware that a new integrity target has been
> developed and is approaching upstream inclussion?  It is called
> dm-verity.
>
> Mikulas posted the most recent version here:
> http://www.redhat.com/archives/dm-devel/2012-March/msg00133.html
>
> And here is the userspace setup code:
> http://www.redhat.com/archives/dm-devel/2012-March/msg00134.html
>
> The original docs from Google's initial dm-verity are mostly applicable
> (but need to be refreshed and included with the new version):
> http://www.redhat.com/archives/dm-devel/2012-February/msg00119.html
>
> dm-verity already has 2 formats, a 3rd could be added if you found the
> existing formats somehow lacking, see:
> http://www.redhat.com/archives/dm-devel/2012-March/msg00143.html
>
> Mike

Hi,

Yes.. I am aware of dm-verity target.
It suites well for read-only cases.
It is questionable how tree-based approach will work with read-write.
Each single update will cause whole tree recalculation.

- Dmitry

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-23 16:22       ` dm-bufio Mikulas Patocka
@ 2012-03-23 23:58         ` Kasatkin, Dmitry
  2012-03-24  1:07           ` dm-bufio Mikulas Patocka
  0 siblings, 1 reply; 51+ messages in thread
From: Kasatkin, Dmitry @ 2012-03-23 23:58 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: device-mapper development, Zdenek Kabelac

On Fri, Mar 23, 2012 at 6:22 PM, Mikulas Patocka <mpatocka@redhat.com> wrote:
>
>
> On Fri, 23 Mar 2012, Kasatkin, Dmitry wrote:
>
>> When using dm-bufio and dm-io in general, how to ensure that all dirty
>
> Regarding dm-io, you don't have to flush dirty buffers because dm-io does
> no caching. You only need to flush hardware disk cache with
> blkdev_issue_flush.
>
>> At the moment, I have reboot notifier which does the following
>>
>>       dm_bufio_write_dirty_buffers(d->bufio);
>>       sync_blockdev(d->dev->bdev);
>>       blkdev_issue_flush(d->dev->bdev, GFP_KERNEL, NULL);
>>
>> without first line on the next boot I got corrupted/not updated blocks.
>> and I am not sure if I need last 2 lines...
>
> You can drop sync_blockdev(d->dev->bdev) --- sync_blockdev flushes kernel
> buffer cache and dm-bufio doesn't use the kernel buffer cache (you only
> need sync_blockdev if you use kernel buffer cache on this device for
> something else). If you drop sync_blockdev, you can also drop
> blkdev_issue_flush, because dm_bufio_write_dirty_buffers already issues
> the flush request.
>
> BTW. how are you going to deal with kernel crashes or power faults?
>
> It is much better to use journaling, phase tree, crash counts or other
> method to maintain metadata integrity insted of reboot notifier --- these
> methods maintain integrity even in case of unexpected crash.
>
> Mikulas
>

Hi,

Thanks for clarification.
Indeed everything works just with dm_bufio_write_dirty_buffers().
Reboot notifier is to issue the flush only..
As I understand, dm-bufio will do the flush but currently once per 10 seconds.

if data on the block device and metadata on other block device get out
of sync, what you can do then?
how journal helps then?

- Dmitry

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-23 15:17             ` dm-bufio Kasatkin, Dmitry
@ 2012-03-23 21:32               ` Mike Snitzer
  2012-03-24  0:04                 ` dm-bufio Kasatkin, Dmitry
  0 siblings, 1 reply; 51+ messages in thread
From: Mike Snitzer @ 2012-03-23 21:32 UTC (permalink / raw)
  To: dmitry.kasatkin; +Cc: device-mapper development, mpatocka, Zdenek Kabelac

On Fri, Mar 23 2012 at 11:17am -0400,
Kasatkin, Dmitry <dmitry.kasatkin@intel.com> wrote:

> On Fri, Mar 23, 2012 at 4:32 PM, Mike Snitzer <snitzer@redhat.com> wrote:
> > As an aside, just curious: what does your target do?
> 
> I have experimented with integrity protection.

Interesting.  Are you aware that a new integrity target has been
developed and is approaching upstream inclussion?  It is called
dm-verity.

Mikulas posted the most recent version here:
http://www.redhat.com/archives/dm-devel/2012-March/msg00133.html

And here is the userspace setup code:
http://www.redhat.com/archives/dm-devel/2012-March/msg00134.html

The original docs from Google's initial dm-verity are mostly applicable
(but need to be refreshed and included with the new version):
http://www.redhat.com/archives/dm-devel/2012-February/msg00119.html

dm-verity already has 2 formats, a 3rd could be added if you found the
existing formats somehow lacking, see:
http://www.redhat.com/archives/dm-devel/2012-March/msg00143.html

Mike

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-23 14:12     ` dm-bufio Kasatkin, Dmitry
  2012-03-23 14:21       ` dm-bufio Mike Snitzer
@ 2012-03-23 16:22       ` Mikulas Patocka
  2012-03-23 23:58         ` dm-bufio Kasatkin, Dmitry
  1 sibling, 1 reply; 51+ messages in thread
From: Mikulas Patocka @ 2012-03-23 16:22 UTC (permalink / raw)
  To: Kasatkin, Dmitry; +Cc: device-mapper development, Zdenek Kabelac



On Fri, 23 Mar 2012, Kasatkin, Dmitry wrote:

> When using dm-bufio and dm-io in general, how to ensure that all dirty

Regarding dm-io, you don't have to flush dirty buffers because dm-io does 
no caching. You only need to flush hardware disk cache with 
blkdev_issue_flush.

> At the moment, I have reboot notifier which does the following
> 
> 	dm_bufio_write_dirty_buffers(d->bufio);
> 	sync_blockdev(d->dev->bdev);
> 	blkdev_issue_flush(d->dev->bdev, GFP_KERNEL, NULL);
> 
> without first line on the next boot I got corrupted/not updated blocks.
> and I am not sure if I need last 2 lines...

You can drop sync_blockdev(d->dev->bdev) --- sync_blockdev flushes kernel 
buffer cache and dm-bufio doesn't use the kernel buffer cache (you only 
need sync_blockdev if you use kernel buffer cache on this device for 
something else). If you drop sync_blockdev, you can also drop 
blkdev_issue_flush, because dm_bufio_write_dirty_buffers already issues 
the flush request.

BTW. how are you going to deal with kernel crashes or power faults?

It is much better to use journaling, phase tree, crash counts or other 
method to maintain metadata integrity insted of reboot notifier --- these 
methods maintain integrity even in case of unexpected crash.

Mikulas

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-23 14:32           ` dm-bufio Mike Snitzer
@ 2012-03-23 15:17             ` Kasatkin, Dmitry
  2012-03-23 21:32               ` dm-bufio Mike Snitzer
  0 siblings, 1 reply; 51+ messages in thread
From: Kasatkin, Dmitry @ 2012-03-23 15:17 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: device-mapper development, mpatocka, Zdenek Kabelac

On Fri, Mar 23, 2012 at 4:32 PM, Mike Snitzer <snitzer@redhat.com> wrote:
> On Fri, Mar 23 2012 at 10:29am -0400,
> Kasatkin, Dmitry <dmitry.kasatkin@intel.com> wrote:
>
>> On Fri, Mar 23, 2012 at 4:21 PM, Mike Snitzer <snitzer@redhat.com> wrote:
>> > On Fri, Mar 23 2012 at 10:12am -0400,
>> > Kasatkin, Dmitry <dmitry.kasatkin@intel.com> wrote:
>> >
>> >> On Fri, Mar 23, 2012 at 1:26 PM, Kasatkin, Dmitry
>> >> <dmitry.kasatkin@intel.com> wrote:
>> >> > On Fri, Mar 23, 2012 at 1:10 PM, Zdenek Kabelac <zkabelac@redhat.com> wrote:
>> >> >> Dne 23.3.2012 12:01, Kasatkin, Dmitry napsal(a):
>> >> >>> Hello,
>> >> >>>
>> >> >>> When using dm-bufio and dm-io in general, how to ensure that all dirty
>> >> >>> buffers are written to the storage when machine reboots?
>> >> >>> suspend hooks could be used, but they are not called on reboot, only
>> >> >>> when suspending/removing the target...
>> >> >>>
>> >> >>
>> >> >> You mean you reboot without running  'sync' command?
>> >> >>
>> >> >> And yes - on reboot you should properly unmount devices - so you should
>> >> >> see removal of target on your shutdown sequence -  I believe Fedora currently
>> >> >> tries to support switch to some shutdown ramdisk, so all filesystem and
>> >> >> devices might be properly unmounted and destroyed.
>> >> >>
>> >> >
>> >> > Hello,
>> >> >
>> >> > Thanks for response.
>> >> > I use bufio to store some data on block device.
>> >> > It is not mounted in anyway. My target just use it to load/store data.
>> >> > When machine reboots, I want to be sure that bufio written all dirty buffers...
>> >> >
>> >> > - Dmitry
>> >> >
>> >>
>> >> At the moment, I have reboot notifier which does the following
>> >>
>> >>       dm_bufio_write_dirty_buffers(d->bufio);
>> >>       sync_blockdev(d->dev->bdev);
>> >>       blkdev_issue_flush(d->dev->bdev, GFP_KERNEL, NULL);
>> >>
>> >> without first line on the next boot I got corrupted/not updated blocks.
>> >> and I am not sure if I need last 2 lines...
>> >
>> > Are you cleanly removing the target from the kernel before reboot
>> > (e.g. dmsetup remove devname)?
>> >
>> > As long as your target's .dtr is making sure to flush all outstanding IO
>> > (like your reboot notifier does) you should be fine.
>> >
>>
>> The target contains rootfs... On reboot, it is remounted read-only.
>> I cannot remove it...
>>
>> Sometime ago I had "message" operation "sync", to sync backing devices.
>> But reboot notifier looks nice... It is automatically called.
>
> OK.
>
> As an aside, just curious: what does your target do?

I have experimented with integrity protection.

Thanks.

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-23 14:29         ` dm-bufio Kasatkin, Dmitry
@ 2012-03-23 14:32           ` Mike Snitzer
  2012-03-23 15:17             ` dm-bufio Kasatkin, Dmitry
  0 siblings, 1 reply; 51+ messages in thread
From: Mike Snitzer @ 2012-03-23 14:32 UTC (permalink / raw)
  To: Kasatkin, Dmitry; +Cc: device-mapper development, mpatocka, Zdenek Kabelac

On Fri, Mar 23 2012 at 10:29am -0400,
Kasatkin, Dmitry <dmitry.kasatkin@intel.com> wrote:

> On Fri, Mar 23, 2012 at 4:21 PM, Mike Snitzer <snitzer@redhat.com> wrote:
> > On Fri, Mar 23 2012 at 10:12am -0400,
> > Kasatkin, Dmitry <dmitry.kasatkin@intel.com> wrote:
> >
> >> On Fri, Mar 23, 2012 at 1:26 PM, Kasatkin, Dmitry
> >> <dmitry.kasatkin@intel.com> wrote:
> >> > On Fri, Mar 23, 2012 at 1:10 PM, Zdenek Kabelac <zkabelac@redhat.com> wrote:
> >> >> Dne 23.3.2012 12:01, Kasatkin, Dmitry napsal(a):
> >> >>> Hello,
> >> >>>
> >> >>> When using dm-bufio and dm-io in general, how to ensure that all dirty
> >> >>> buffers are written to the storage when machine reboots?
> >> >>> suspend hooks could be used, but they are not called on reboot, only
> >> >>> when suspending/removing the target...
> >> >>>
> >> >>
> >> >> You mean you reboot without running  'sync' command?
> >> >>
> >> >> And yes - on reboot you should properly unmount devices - so you should
> >> >> see removal of target on your shutdown sequence -  I believe Fedora currently
> >> >> tries to support switch to some shutdown ramdisk, so all filesystem and
> >> >> devices might be properly unmounted and destroyed.
> >> >>
> >> >
> >> > Hello,
> >> >
> >> > Thanks for response.
> >> > I use bufio to store some data on block device.
> >> > It is not mounted in anyway. My target just use it to load/store data.
> >> > When machine reboots, I want to be sure that bufio written all dirty buffers...
> >> >
> >> > - Dmitry
> >> >
> >>
> >> At the moment, I have reboot notifier which does the following
> >>
> >>       dm_bufio_write_dirty_buffers(d->bufio);
> >>       sync_blockdev(d->dev->bdev);
> >>       blkdev_issue_flush(d->dev->bdev, GFP_KERNEL, NULL);
> >>
> >> without first line on the next boot I got corrupted/not updated blocks.
> >> and I am not sure if I need last 2 lines...
> >
> > Are you cleanly removing the target from the kernel before reboot
> > (e.g. dmsetup remove devname)?
> >
> > As long as your target's .dtr is making sure to flush all outstanding IO
> > (like your reboot notifier does) you should be fine.
> >
> 
> The target contains rootfs... On reboot, it is remounted read-only.
> I cannot remove it...
> 
> Sometime ago I had "message" operation "sync", to sync backing devices.
> But reboot notifier looks nice... It is automatically called.

OK.

As an aside, just curious: what does your target do?

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-23 14:21       ` dm-bufio Mike Snitzer
@ 2012-03-23 14:29         ` Kasatkin, Dmitry
  2012-03-23 14:32           ` dm-bufio Mike Snitzer
  0 siblings, 1 reply; 51+ messages in thread
From: Kasatkin, Dmitry @ 2012-03-23 14:29 UTC (permalink / raw)
  To: device-mapper development; +Cc: mpatocka, Zdenek Kabelac

On Fri, Mar 23, 2012 at 4:21 PM, Mike Snitzer <snitzer@redhat.com> wrote:
> On Fri, Mar 23 2012 at 10:12am -0400,
> Kasatkin, Dmitry <dmitry.kasatkin@intel.com> wrote:
>
>> On Fri, Mar 23, 2012 at 1:26 PM, Kasatkin, Dmitry
>> <dmitry.kasatkin@intel.com> wrote:
>> > On Fri, Mar 23, 2012 at 1:10 PM, Zdenek Kabelac <zkabelac@redhat.com> wrote:
>> >> Dne 23.3.2012 12:01, Kasatkin, Dmitry napsal(a):
>> >>> Hello,
>> >>>
>> >>> When using dm-bufio and dm-io in general, how to ensure that all dirty
>> >>> buffers are written to the storage when machine reboots?
>> >>> suspend hooks could be used, but they are not called on reboot, only
>> >>> when suspending/removing the target...
>> >>>
>> >>
>> >> You mean you reboot without running  'sync' command?
>> >>
>> >> And yes - on reboot you should properly unmount devices - so you should
>> >> see removal of target on your shutdown sequence -  I believe Fedora currently
>> >> tries to support switch to some shutdown ramdisk, so all filesystem and
>> >> devices might be properly unmounted and destroyed.
>> >>
>> >
>> > Hello,
>> >
>> > Thanks for response.
>> > I use bufio to store some data on block device.
>> > It is not mounted in anyway. My target just use it to load/store data.
>> > When machine reboots, I want to be sure that bufio written all dirty buffers...
>> >
>> > - Dmitry
>> >
>>
>> At the moment, I have reboot notifier which does the following
>>
>>       dm_bufio_write_dirty_buffers(d->bufio);
>>       sync_blockdev(d->dev->bdev);
>>       blkdev_issue_flush(d->dev->bdev, GFP_KERNEL, NULL);
>>
>> without first line on the next boot I got corrupted/not updated blocks.
>> and I am not sure if I need last 2 lines...
>
> Are you cleanly removing the target from the kernel before reboot
> (e.g. dmsetup remove devname)?
>
> As long as your target's .dtr is making sure to flush all outstanding IO
> (like your reboot notifier does) you should be fine.
>

The target contains rootfs... On reboot, it is remounted read-only.
I cannot remove it...

Sometime ago I had "message" operation "sync", to sync backing devices.
But reboot notifier looks nice... It is automatically called.

> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-23 14:12     ` dm-bufio Kasatkin, Dmitry
@ 2012-03-23 14:21       ` Mike Snitzer
  2012-03-23 14:29         ` dm-bufio Kasatkin, Dmitry
  2012-03-23 16:22       ` dm-bufio Mikulas Patocka
  1 sibling, 1 reply; 51+ messages in thread
From: Mike Snitzer @ 2012-03-23 14:21 UTC (permalink / raw)
  To: Kasatkin, Dmitry; +Cc: device-mapper development, mpatocka, Zdenek Kabelac

On Fri, Mar 23 2012 at 10:12am -0400,
Kasatkin, Dmitry <dmitry.kasatkin@intel.com> wrote:

> On Fri, Mar 23, 2012 at 1:26 PM, Kasatkin, Dmitry
> <dmitry.kasatkin@intel.com> wrote:
> > On Fri, Mar 23, 2012 at 1:10 PM, Zdenek Kabelac <zkabelac@redhat.com> wrote:
> >> Dne 23.3.2012 12:01, Kasatkin, Dmitry napsal(a):
> >>> Hello,
> >>>
> >>> When using dm-bufio and dm-io in general, how to ensure that all dirty
> >>> buffers are written to the storage when machine reboots?
> >>> suspend hooks could be used, but they are not called on reboot, only
> >>> when suspending/removing the target...
> >>>
> >>
> >> You mean you reboot without running  'sync' command?
> >>
> >> And yes - on reboot you should properly unmount devices - so you should
> >> see removal of target on your shutdown sequence -  I believe Fedora currently
> >> tries to support switch to some shutdown ramdisk, so all filesystem and
> >> devices might be properly unmounted and destroyed.
> >>
> >
> > Hello,
> >
> > Thanks for response.
> > I use bufio to store some data on block device.
> > It is not mounted in anyway. My target just use it to load/store data.
> > When machine reboots, I want to be sure that bufio written all dirty buffers...
> >
> > - Dmitry
> >
> 
> At the moment, I have reboot notifier which does the following
> 
> 	dm_bufio_write_dirty_buffers(d->bufio);
> 	sync_blockdev(d->dev->bdev);
> 	blkdev_issue_flush(d->dev->bdev, GFP_KERNEL, NULL);
> 
> without first line on the next boot I got corrupted/not updated blocks.
> and I am not sure if I need last 2 lines...

Are you cleanly removing the target from the kernel before reboot
(e.g. dmsetup remove devname)?

As long as your target's .dtr is making sure to flush all outstanding IO
(like your reboot notifier does) you should be fine.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-23 11:26   ` dm-bufio Kasatkin, Dmitry
@ 2012-03-23 14:12     ` Kasatkin, Dmitry
  2012-03-23 14:21       ` dm-bufio Mike Snitzer
  2012-03-23 16:22       ` dm-bufio Mikulas Patocka
  0 siblings, 2 replies; 51+ messages in thread
From: Kasatkin, Dmitry @ 2012-03-23 14:12 UTC (permalink / raw)
  To: Zdenek Kabelac; +Cc: device-mapper development, mpatocka

On Fri, Mar 23, 2012 at 1:26 PM, Kasatkin, Dmitry
<dmitry.kasatkin@intel.com> wrote:
> On Fri, Mar 23, 2012 at 1:10 PM, Zdenek Kabelac <zkabelac@redhat.com> wrote:
>> Dne 23.3.2012 12:01, Kasatkin, Dmitry napsal(a):
>>> Hello,
>>>
>>> When using dm-bufio and dm-io in general, how to ensure that all dirty
>>> buffers are written to the storage when machine reboots?
>>> suspend hooks could be used, but they are not called on reboot, only
>>> when suspending/removing the target...
>>>
>>
>> You mean you reboot without running  'sync' command?
>>
>> And yes - on reboot you should properly unmount devices - so you should
>> see removal of target on your shutdown sequence -  I believe Fedora currently
>> tries to support switch to some shutdown ramdisk, so all filesystem and
>> devices might be properly unmounted and destroyed.
>>
>
> Hello,
>
> Thanks for response.
> I use bufio to store some data on block device.
> It is not mounted in anyway. My target just use it to load/store data.
> When machine reboots, I want to be sure that bufio written all dirty buffers...
>
> - Dmitry
>

At the moment, I have reboot notifier which does the following

	dm_bufio_write_dirty_buffers(d->bufio);
	sync_blockdev(d->dev->bdev);
	blkdev_issue_flush(d->dev->bdev, GFP_KERNEL, NULL);

without first line on the next boot I got corrupted/not updated blocks.
and I am not sure if I need last 2 lines...

- Dmitry

>
>> Zdenek
>>
>>

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-23 11:10 ` dm-bufio Zdenek Kabelac
@ 2012-03-23 11:26   ` Kasatkin, Dmitry
  2012-03-23 14:12     ` dm-bufio Kasatkin, Dmitry
  0 siblings, 1 reply; 51+ messages in thread
From: Kasatkin, Dmitry @ 2012-03-23 11:26 UTC (permalink / raw)
  To: Zdenek Kabelac; +Cc: device-mapper development, mpatocka

On Fri, Mar 23, 2012 at 1:10 PM, Zdenek Kabelac <zkabelac@redhat.com> wrote:
> Dne 23.3.2012 12:01, Kasatkin, Dmitry napsal(a):
>> Hello,
>>
>> When using dm-bufio and dm-io in general, how to ensure that all dirty
>> buffers are written to the storage when machine reboots?
>> suspend hooks could be used, but they are not called on reboot, only
>> when suspending/removing the target...
>>
>
> You mean you reboot without running  'sync' command?
>
> And yes - on reboot you should properly unmount devices - so you should
> see removal of target on your shutdown sequence -  I believe Fedora currently
> tries to support switch to some shutdown ramdisk, so all filesystem and
> devices might be properly unmounted and destroyed.
>

Hello,

Thanks for response.
I use bufio to store some data on block device.
It is not mounted in anyway. My target just use it to load/store data.
When machine reboots, I want to be sure that bufio written all dirty buffers...

- Dmitry


> Zdenek
>
>

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2012-03-23 11:01 dm-bufio Kasatkin, Dmitry
@ 2012-03-23 11:10 ` Zdenek Kabelac
  2012-03-23 11:26   ` dm-bufio Kasatkin, Dmitry
  0 siblings, 1 reply; 51+ messages in thread
From: Zdenek Kabelac @ 2012-03-23 11:10 UTC (permalink / raw)
  To: device-mapper development; +Cc: Kasatkin, Dmitry, mpatocka

Dne 23.3.2012 12:01, Kasatkin, Dmitry napsal(a):
> Hello,
> 
> When using dm-bufio and dm-io in general, how to ensure that all dirty
> buffers are written to the storage when machine reboots?
> suspend hooks could be used, but they are not called on reboot, only
> when suspending/removing the target...
> 

You mean you reboot without running  'sync' command?

And yes - on reboot you should properly unmount devices - so you should
see removal of target on your shutdown sequence -  I believe Fedora currently
tries to support switch to some shutdown ramdisk, so all filesystem and
devices might be properly unmounted and destroyed.

Zdenek

^ permalink raw reply	[flat|nested] 51+ messages in thread

* dm-bufio
@ 2012-03-23 11:01 Kasatkin, Dmitry
  2012-03-23 11:10 ` dm-bufio Zdenek Kabelac
  0 siblings, 1 reply; 51+ messages in thread
From: Kasatkin, Dmitry @ 2012-03-23 11:01 UTC (permalink / raw)
  To: dm-devel; +Cc: mpatocka

Hello,

When using dm-bufio and dm-io in general, how to ensure that all dirty
buffers are written to the storage when machine reboots?
suspend hooks could be used, but they are not called on reboot, only
when suspending/removing the target...


Thanks,
Dmitry

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2011-10-17 16:49   ` dm-bufio Mikulas Patocka
@ 2011-10-17 18:01     ` Joe Thornber
  0 siblings, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-17 18:01 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Alasdair G. Kergon

On Mon, Oct 17, 2011 at 12:49:22PM -0400, Mikulas Patocka wrote:
> If you want to do it, just do it, but it makes no sense to me.

Well Alasdair, Mike and myself have all looked at this code at various
times over the last 18 months and come to similar conclusions.  We
find the following logic difficult with all those gotos.  There is
also some redundancy which I only noticed when trying to refactor it
(repeated checks).  There's an example that has a jump from one branch
of an 'if' to another, it's trivial to factor out into a function for
each branch, yet you insist on arguing and refuse to change it.  I'm
perfectly prepared to accept that you're familiar enough with this
code, or just clever enough for it to be transparent to you.  But it's
the sort of thing that's going to get picked up on by other small
brained developers like me.  Choose your battles wisely, and save your
energies for something more interesting.

- Joe

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2011-10-14  9:15 ` dm-bufio Joe Thornber
  2011-10-14 12:19   ` dm-bufio Joe Thornber
@ 2011-10-17 16:49   ` Mikulas Patocka
  2011-10-17 18:01     ` dm-bufio Joe Thornber
  1 sibling, 1 reply; 51+ messages in thread
From: Mikulas Patocka @ 2011-10-17 16:49 UTC (permalink / raw)
  To: Joe Thornber; +Cc: dm-devel, Alasdair G. Kergon



On Fri, 14 Oct 2011, Joe Thornber wrote:

> On Thu, Oct 13, 2011 at 11:05:51AM -0400, Mikulas Patocka wrote:
> > Hi
> > 
> > I found a way how to shut up lockdep warnings in dm-bufio, so you can 
> > download new a version which uses nested locks from 
> > http://people.redhat.com/mpatocka/patches/kernel/dm-thinp-bufio/
> 
> Thanks Mikulas, I'd had a couple of people ask me about these warnings.
> 
> > BTW. why did you move dm-bufio to persistent-data directory? What are 
> > other dm-bufio users (such as dm-zeroed or dm-multisnap) going to do? I 
> > thought dm-bufio should be a separate module available to all device 
> > mapper targets, such as dm-io, dm-kcopyd or so?
> 
> agk made it clear he wasn't going to merge bufio when I met with him a
> couple of weeks ago.  So to try and speed things up I put my 'agk' hat
> on and tidied as I thought he would.  The main things I did were
> remove the gratuitous gotos (eg, jumping from one branch of an if to
> another).

My point is that - to get understandable code, each function should do a 
task that can be easily and simply described. It is not necessary that the 
function body is small or that it dosn't contain gotos, what is required 
is that the description of a function is small.

If you replace random parts of a function body with a call to another 
function in order to eliminate a goto, it doesn't improve readability of 
the code at all. Instead of a goto you get a call - both are control 
transfer constructs and a call is not necessarily easier to follow than a 
goto (unless the condition in the first paragraph is true) - as it can be 
seen that both you and me made some bugs on those new 
goto-replaced-with-call functions.

If you want to do it, just do it, but it makes no sense to me.

Mikulas

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2011-10-14  9:15 ` dm-bufio Joe Thornber
@ 2011-10-14 12:19   ` Joe Thornber
  2011-10-17 16:49   ` dm-bufio Mikulas Patocka
  1 sibling, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-14 12:19 UTC (permalink / raw)
  To: Mikulas Patocka, Alasdair G. Kergon, dm-devel

On Fri, Oct 14, 2011 at 10:15:38AM +0100, Joe Thornber wrote:
> I'm not sure moving dm-bufio to pdata was the right thing to do.

It's back in drivers/md.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2011-10-13 22:31     ` dm-bufio Mike Snitzer
@ 2011-10-14  9:19       ` Joe Thornber
  0 siblings, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-14  9:19 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, Mikulas Patocka, Alasdair G. Kergon

On Thu, Oct 13, 2011 at 06:31:47PM -0400, Mike Snitzer wrote:
> On Thu, Oct 13 2011 at  6:19pm -0400,
> Mikulas Patocka <mpatocka@redhat.com> wrote:
> 
> > > +static int dm_bufio_trylock(struct dm_bufio_client *c)
> > > +{
> > > +	return dm_bufio_trylock(c);
> > > +}
> > > +
> > > +static void dm_bufio_unlock(struct dm_bufio_client *c)
> > > +{
> > > +	dm_bufio_unlock(c);
> > > +}
> > 
> > These two functions are recursive nonsense :) There should be
> > "return mutex_trylock(&c->lock);" and "mutex_unlock(&c->lock);".
> 
> Heheh.  Sorry, M-x replace-string gone wild.
> 
> Here is a fixed version.

Thanks Mike.  If you haven't tested something can you please say, so I
know to run it through the full suite?

- Joe

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2011-10-13 15:05 dm-bufio Mikulas Patocka
  2011-10-13 19:40 ` dm-bufio Mike Snitzer
@ 2011-10-14  9:15 ` Joe Thornber
  2011-10-14 12:19   ` dm-bufio Joe Thornber
  2011-10-17 16:49   ` dm-bufio Mikulas Patocka
  1 sibling, 2 replies; 51+ messages in thread
From: Joe Thornber @ 2011-10-14  9:15 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Alasdair G. Kergon

On Thu, Oct 13, 2011 at 11:05:51AM -0400, Mikulas Patocka wrote:
> Hi
> 
> I found a way how to shut up lockdep warnings in dm-bufio, so you can 
> download new a version which uses nested locks from 
> http://people.redhat.com/mpatocka/patches/kernel/dm-thinp-bufio/

Thanks Mikulas, I'd had a couple of people ask me about these warnings.

> BTW. why did you move dm-bufio to persistent-data directory? What are 
> other dm-bufio users (such as dm-zeroed or dm-multisnap) going to do? I 
> thought dm-bufio should be a separate module available to all device 
> mapper targets, such as dm-io, dm-kcopyd or so?

agk made it clear he wasn't going to merge bufio when I met with him a
couple of weeks ago.  So to try and speed things up I put my 'agk' hat
on and tidied as I thought he would.  The main things I did were
remove the gratuitous gotos (eg, jumping from one branch of an if to
another).

I'm not sure moving dm-bufio to pdata was the right thing to do.  My
rationale was there's only one user atm, so put it in there.  I don't
think the header should be in include/linux, this is not going to be
used outside dm (dm-io.h should be either).

This isn't set in stone, take this up with agk.

- Joe

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2011-10-13 22:19   ` dm-bufio Mikulas Patocka
@ 2011-10-13 22:31     ` Mike Snitzer
  2011-10-14  9:19       ` dm-bufio Joe Thornber
  0 siblings, 1 reply; 51+ messages in thread
From: Mike Snitzer @ 2011-10-13 22:31 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Edward Thornber, Alasdair G. Kergon

On Thu, Oct 13 2011 at  6:19pm -0400,
Mikulas Patocka <mpatocka@redhat.com> wrote:

> > +static int dm_bufio_trylock(struct dm_bufio_client *c)
> > +{
> > +	return dm_bufio_trylock(c);
> > +}
> > +
> > +static void dm_bufio_unlock(struct dm_bufio_client *c)
> > +{
> > +	dm_bufio_unlock(c);
> > +}
> 
> These two functions are recursive nonsense :) There should be
> "return mutex_trylock(&c->lock);" and "mutex_unlock(&c->lock);".

Heheh.  Sorry, M-x replace-string gone wild.

Here is a fixed version.

---
 drivers/md/persistent-data/dm-bufio.c |   71 ++++++++++++++++++++++-----------
 1 files changed, 48 insertions(+), 23 deletions(-)

diff --git a/drivers/md/persistent-data/dm-bufio.c b/drivers/md/persistent-data/dm-bufio.c
index 6be4386..e7d1aac 100644
--- a/drivers/md/persistent-data/dm-bufio.c
+++ b/drivers/md/persistent-data/dm-bufio.c
@@ -151,6 +151,23 @@ static inline int dm_bufio_cache_index(struct dm_bufio_client *c)
 #define DM_BUFIO_CACHE(c)	(dm_bufio_caches[dm_bufio_cache_index(c)])
 #define DM_BUFIO_CACHE_NAME(c)	(dm_bufio_cache_names[dm_bufio_cache_index(c)])
 
+#define dm_bufio_in_request()	(!!current->bio_list)
+
+static void dm_bufio_lock(struct dm_bufio_client *c)
+{
+	mutex_lock_nested(&c->lock, dm_bufio_in_request());
+}
+
+static int dm_bufio_trylock(struct dm_bufio_client *c)
+{
+	return mutex_trylock(&c->lock);
+}
+
+static void dm_bufio_unlock(struct dm_bufio_client *c)
+{
+	mutex_unlock(&c->lock);
+}
+
 /*----------------------------------------------------------------*/
 
 /* Default cache size --- available memory divided by the ratio */
@@ -595,14 +612,14 @@ static void __wait_for_free_buffer(struct dm_bufio_client *c)
 
 	add_wait_queue(&c->free_buffer_wait, &wait);
 	set_task_state(current, TASK_UNINTERRUPTIBLE);
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 
 	io_schedule();
 
 	set_task_state(current, TASK_RUNNING);
 	remove_wait_queue(&c->free_buffer_wait, &wait);
 
-	mutex_lock(&c->lock);
+	dm_bufio_lock(c);
 }
 
 /*
@@ -836,9 +853,9 @@ static void *new_read(struct dm_bufio_client *c, sector_t block, enum new_flag n
 	int need_submit;
 	struct dm_buffer *b;
 
-	mutex_lock(&c->lock);
+	dm_bufio_lock(c);
 	b = __bufio_new(c, block, nf, bp, &need_submit);
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 
 	if (!b || IS_ERR(b))
 		return b;
@@ -867,19 +884,21 @@ void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
 void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
 		    struct dm_buffer **bp)
 {
+	BUG_ON(dm_bufio_in_request());
 	return new_read(c, block, NF_READ, bp);
 }
 
 void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
 		   struct dm_buffer **bp)
 {
+	BUG_ON(dm_bufio_in_request());
 	return new_read(c, block, NF_FRESH, bp);
 }
 
 void dm_bufio_release(struct dm_buffer *b)
 {
 	struct dm_bufio_client *c = b->c;
-	mutex_lock(&c->lock);
+	dm_bufio_lock(c);
 	BUG_ON(test_bit(B_READING, &b->state));
 	BUG_ON(!b->hold_count);
 	b->hold_count--;
@@ -898,26 +917,27 @@ void dm_bufio_release(struct dm_buffer *b)
 			__free_buffer_wake(b);
 		}
 	}
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 }
 
 void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
 {
 	struct dm_bufio_client *c = b->c;
 
-	mutex_lock(&c->lock);
+	dm_bufio_lock(c);
 
 	if (!test_and_set_bit(B_DIRTY, &b->state))
 		__relink_lru(b, LIST_DIRTY);
 
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 }
 
 void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
 {
-	mutex_lock(&c->lock);
+	BUG_ON(dm_bufio_in_request());
+	dm_bufio_lock(c);
 	__write_dirty_buffers_async(c, 0);
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 }
 
 /*
@@ -933,7 +953,7 @@ int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
 	unsigned long buffers_processed = 0;
 	struct dm_buffer *b, *tmp;
 
-	mutex_lock(&c->lock);
+	dm_bufio_lock(c);
 	__write_dirty_buffers_async(c, 0);
 
 again:
@@ -947,10 +967,10 @@ again:
 			if (buffers_processed < c->n_buffers[LIST_DIRTY]) {
 				dropped_lock = 1;
 				b->hold_count++;
-				mutex_unlock(&c->lock);
+				dm_bufio_unlock(c);
 				wait_on_bit(&b->state, B_WRITING,
 					    do_io_schedule, TASK_UNINTERRUPTIBLE);
-				mutex_lock(&c->lock);
+				dm_bufio_lock(c);
 				b->hold_count--;
 			} else
 				wait_on_bit(&b->state, B_WRITING,
@@ -978,7 +998,7 @@ again:
 			goto again;
 	}
 	wake_up(&c->free_buffer_wait);
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 
 	a = xchg(&c->async_write_error, 0);
 	f = dm_bufio_issue_flush(c);
@@ -1003,6 +1023,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c)
 		.sector = 0,
 		.count = 0,
 	};
+	BUG_ON(dm_bufio_in_request());
 	return dm_io(&io_req, 1, &io_reg, NULL);
 }
 
@@ -1023,7 +1044,9 @@ void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block)
 	struct dm_bufio_client *c = b->c;
 	struct dm_buffer *new;
 
-	mutex_lock(&c->lock);
+	BUG_ON(dm_bufio_in_request());
+
+	dm_bufio_lock(c);
 
 retry:
 	new = __find(c, new_block);
@@ -1054,7 +1077,7 @@ retry:
 		wait_on_bit(&b->state, B_WRITING,
 			    do_io_schedule, TASK_UNINTERRUPTIBLE);
 	}
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 	dm_bufio_release(b);
 }
 
@@ -1094,10 +1117,12 @@ static void drop_buffers(struct dm_bufio_client *c)
 	struct dm_buffer *b;
 	int i;
 
+	BUG_ON(dm_bufio_in_request());
+
 	/* an optimization ... so that the buffers are not written one-by-one */
 	dm_bufio_write_dirty_buffers_async(c);
 
-	mutex_lock(&c->lock);
+	dm_bufio_lock(c);
 	while ((b = __get_unclaimed_buffer(c)))
 		__free_buffer_wake(b);
 
@@ -1110,7 +1135,7 @@ static void drop_buffers(struct dm_bufio_client *c)
 	for (i = 0; i < LIST_N; i++)
 		BUG_ON(!list_empty(&c->lru[i]));
 
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 }
 
 /*
@@ -1166,9 +1191,9 @@ static int shrink(struct shrinker *shrinker, struct shrink_control *sc)
 	unsigned long nr_to_scan = sc->nr_to_scan;
 
 	if (sc->gfp_mask & __GFP_IO) {
-		mutex_lock(&c->lock);
+		dm_bufio_lock(c);
 	} else {
-		if (!mutex_trylock(&c->lock))
+		if (!dm_bufio_trylock(c))
 			return !nr_to_scan ? 0 : -1;
 	}
 
@@ -1179,7 +1204,7 @@ static int shrink(struct shrinker *shrinker, struct shrink_control *sc)
 	if (r > INT_MAX)
 		r = INT_MAX;
 
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 
 	return r;
 }
@@ -1356,7 +1381,7 @@ static void cleanup_old_buffers(void)
 
 	mutex_lock(&dm_bufio_clients_lock);
 	list_for_each_entry(c, &dm_bufio_all_clients, client_list) {
-		if (!mutex_trylock(&c->lock))
+		if (!dm_bufio_trylock(c))
 			continue;
 
 		while (!list_empty(&c->lru[LIST_CLEAN])) {
@@ -1367,7 +1392,7 @@ static void cleanup_old_buffers(void)
 				break;
 		}
 
-		mutex_unlock(&c->lock);
+		dm_bufio_unlock(c);
 	}
 	mutex_unlock(&dm_bufio_clients_lock);
 }

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2011-10-13 19:40 ` dm-bufio Mike Snitzer
@ 2011-10-13 22:19   ` Mikulas Patocka
  2011-10-13 22:31     ` dm-bufio Mike Snitzer
  0 siblings, 1 reply; 51+ messages in thread
From: Mikulas Patocka @ 2011-10-13 22:19 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, Edward Thornber, Alasdair G. Kergon

> +static int dm_bufio_trylock(struct dm_bufio_client *c)
> +{
> +	return dm_bufio_trylock(c);
> +}
> +
> +static void dm_bufio_unlock(struct dm_bufio_client *c)
> +{
> +	dm_bufio_unlock(c);
> +}

These two functions are recursive nonsense :) There should be
"return mutex_trylock(&c->lock);" and "mutex_unlock(&c->lock);".

Mikulas

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2011-10-13 15:05 dm-bufio Mikulas Patocka
@ 2011-10-13 19:40 ` Mike Snitzer
  2011-10-13 22:19   ` dm-bufio Mikulas Patocka
  2011-10-14  9:15 ` dm-bufio Joe Thornber
  1 sibling, 1 reply; 51+ messages in thread
From: Mike Snitzer @ 2011-10-13 19:40 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Edward Thornber, Alasdair G. Kergon

On Thu, Oct 13 2011 at 11:05am -0400,
Mikulas Patocka <mpatocka@redhat.com> wrote:

> Hi
> 
> I found a way how to shut up lockdep warnings in dm-bufio, so you can 
> download new a version which uses nested locks from 
> http://people.redhat.com/mpatocka/patches/kernel/dm-thinp-bufio/
> 
> BTW. why did you move dm-bufio to persistent-data directory? What are 
> other dm-bufio users (such as dm-zeroed or dm-multisnap) going to do? I 
> thought dm-bufio should be a separate module available to all device 
> mapper targets, such as dm-io, dm-kcopyd or so?

Here is a patch with your nested lock fix that applies to Joe's thin-dev
branch (note that the dm-bufio in thin-dev branch has been cleaned up
some so this took a bit of care; Mikulas would be nice if you reviewed
those changes):

 drivers/md/persistent-data/dm-bufio.c |   71 ++++++++++++++++++++++-----------
 1 files changed, 48 insertions(+), 23 deletions(-)

diff --git a/drivers/md/persistent-data/dm-bufio.c b/drivers/md/persistent-data/dm-bufio.c
index 6be4386..e7d1aac 100644
--- a/drivers/md/persistent-data/dm-bufio.c
+++ b/drivers/md/persistent-data/dm-bufio.c
@@ -151,6 +151,23 @@ static inline int dm_bufio_cache_index(struct dm_bufio_client *c)
 #define DM_BUFIO_CACHE(c)	(dm_bufio_caches[dm_bufio_cache_index(c)])
 #define DM_BUFIO_CACHE_NAME(c)	(dm_bufio_cache_names[dm_bufio_cache_index(c)])
 
+#define dm_bufio_in_request()	(!!current->bio_list)
+
+static void dm_bufio_lock(struct dm_bufio_client *c)
+{
+	mutex_lock_nested(&c->lock, dm_bufio_in_request());
+}
+
+static int dm_bufio_trylock(struct dm_bufio_client *c)
+{
+	return dm_bufio_trylock(c);
+}
+
+static void dm_bufio_unlock(struct dm_bufio_client *c)
+{
+	dm_bufio_unlock(c);
+}
+
 /*----------------------------------------------------------------*/
 
 /* Default cache size --- available memory divided by the ratio */
@@ -595,14 +612,14 @@ static void __wait_for_free_buffer(struct dm_bufio_client *c)
 
 	add_wait_queue(&c->free_buffer_wait, &wait);
 	set_task_state(current, TASK_UNINTERRUPTIBLE);
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 
 	io_schedule();
 
 	set_task_state(current, TASK_RUNNING);
 	remove_wait_queue(&c->free_buffer_wait, &wait);
 
-	mutex_lock(&c->lock);
+	dm_bufio_lock(c);
 }
 
 /*
@@ -836,9 +853,9 @@ static void *new_read(struct dm_bufio_client *c, sector_t block, enum new_flag n
 	int need_submit;
 	struct dm_buffer *b;
 
-	mutex_lock(&c->lock);
+	dm_bufio_lock(c);
 	b = __bufio_new(c, block, nf, bp, &need_submit);
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 
 	if (!b || IS_ERR(b))
 		return b;
@@ -867,19 +884,21 @@ void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
 void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
 		    struct dm_buffer **bp)
 {
+	BUG_ON(dm_bufio_in_request());
 	return new_read(c, block, NF_READ, bp);
 }
 
 void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
 		   struct dm_buffer **bp)
 {
+	BUG_ON(dm_bufio_in_request());
 	return new_read(c, block, NF_FRESH, bp);
 }
 
 void dm_bufio_release(struct dm_buffer *b)
 {
 	struct dm_bufio_client *c = b->c;
-	mutex_lock(&c->lock);
+	dm_bufio_lock(c);
 	BUG_ON(test_bit(B_READING, &b->state));
 	BUG_ON(!b->hold_count);
 	b->hold_count--;
@@ -898,26 +917,27 @@ void dm_bufio_release(struct dm_buffer *b)
 			__free_buffer_wake(b);
 		}
 	}
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 }
 
 void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
 {
 	struct dm_bufio_client *c = b->c;
 
-	mutex_lock(&c->lock);
+	dm_bufio_lock(c);
 
 	if (!test_and_set_bit(B_DIRTY, &b->state))
 		__relink_lru(b, LIST_DIRTY);
 
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 }
 
 void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
 {
-	mutex_lock(&c->lock);
+	BUG_ON(dm_bufio_in_request());
+	dm_bufio_lock(c);
 	__write_dirty_buffers_async(c, 0);
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 }
 
 /*
@@ -933,7 +953,7 @@ int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
 	unsigned long buffers_processed = 0;
 	struct dm_buffer *b, *tmp;
 
-	mutex_lock(&c->lock);
+	dm_bufio_lock(c);
 	__write_dirty_buffers_async(c, 0);
 
 again:
@@ -947,10 +967,10 @@ again:
 			if (buffers_processed < c->n_buffers[LIST_DIRTY]) {
 				dropped_lock = 1;
 				b->hold_count++;
-				mutex_unlock(&c->lock);
+				dm_bufio_unlock(c);
 				wait_on_bit(&b->state, B_WRITING,
 					    do_io_schedule, TASK_UNINTERRUPTIBLE);
-				mutex_lock(&c->lock);
+				dm_bufio_lock(c);
 				b->hold_count--;
 			} else
 				wait_on_bit(&b->state, B_WRITING,
@@ -978,7 +998,7 @@ again:
 			goto again;
 	}
 	wake_up(&c->free_buffer_wait);
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 
 	a = xchg(&c->async_write_error, 0);
 	f = dm_bufio_issue_flush(c);
@@ -1003,6 +1023,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c)
 		.sector = 0,
 		.count = 0,
 	};
+	BUG_ON(dm_bufio_in_request());
 	return dm_io(&io_req, 1, &io_reg, NULL);
 }
 
@@ -1023,7 +1044,9 @@ void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block)
 	struct dm_bufio_client *c = b->c;
 	struct dm_buffer *new;
 
-	mutex_lock(&c->lock);
+	BUG_ON(dm_bufio_in_request());
+
+	dm_bufio_lock(c);
 
 retry:
 	new = __find(c, new_block);
@@ -1054,7 +1077,7 @@ retry:
 		wait_on_bit(&b->state, B_WRITING,
 			    do_io_schedule, TASK_UNINTERRUPTIBLE);
 	}
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 	dm_bufio_release(b);
 }
 
@@ -1094,10 +1117,12 @@ static void drop_buffers(struct dm_bufio_client *c)
 	struct dm_buffer *b;
 	int i;
 
+	BUG_ON(dm_bufio_in_request());
+
 	/* an optimization ... so that the buffers are not written one-by-one */
 	dm_bufio_write_dirty_buffers_async(c);
 
-	mutex_lock(&c->lock);
+	dm_bufio_lock(c);
 	while ((b = __get_unclaimed_buffer(c)))
 		__free_buffer_wake(b);
 
@@ -1110,7 +1135,7 @@ static void drop_buffers(struct dm_bufio_client *c)
 	for (i = 0; i < LIST_N; i++)
 		BUG_ON(!list_empty(&c->lru[i]));
 
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 }
 
 /*
@@ -1166,9 +1191,9 @@ static int shrink(struct shrinker *shrinker, struct shrink_control *sc)
 	unsigned long nr_to_scan = sc->nr_to_scan;
 
 	if (sc->gfp_mask & __GFP_IO) {
-		mutex_lock(&c->lock);
+		dm_bufio_lock(c);
 	} else {
-		if (!mutex_trylock(&c->lock))
+		if (!dm_bufio_trylock(c))
 			return !nr_to_scan ? 0 : -1;
 	}
 
@@ -1179,7 +1204,7 @@ static int shrink(struct shrinker *shrinker, struct shrink_control *sc)
 	if (r > INT_MAX)
 		r = INT_MAX;
 
-	mutex_unlock(&c->lock);
+	dm_bufio_unlock(c);
 
 	return r;
 }
@@ -1356,7 +1381,7 @@ static void cleanup_old_buffers(void)
 
 	mutex_lock(&dm_bufio_clients_lock);
 	list_for_each_entry(c, &dm_bufio_all_clients, client_list) {
-		if (!mutex_trylock(&c->lock))
+		if (!dm_bufio_trylock(c))
 			continue;
 
 		while (!list_empty(&c->lru[LIST_CLEAN])) {
@@ -1367,7 +1392,7 @@ static void cleanup_old_buffers(void)
 				break;
 		}
 
-		mutex_unlock(&c->lock);
+		dm_bufio_unlock(c);
 	}
 	mutex_unlock(&dm_bufio_clients_lock);
 }

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* dm-bufio
@ 2011-10-13 15:05 Mikulas Patocka
  2011-10-13 19:40 ` dm-bufio Mike Snitzer
  2011-10-14  9:15 ` dm-bufio Joe Thornber
  0 siblings, 2 replies; 51+ messages in thread
From: Mikulas Patocka @ 2011-10-13 15:05 UTC (permalink / raw)
  To: Edward Thornber; +Cc: dm-devel, Alasdair G. Kergon

Hi

I found a way how to shut up lockdep warnings in dm-bufio, so you can 
download new a version which uses nested locks from 
http://people.redhat.com/mpatocka/patches/kernel/dm-thinp-bufio/

BTW. why did you move dm-bufio to persistent-data directory? What are 
other dm-bufio users (such as dm-zeroed or dm-multisnap) going to do? I 
thought dm-bufio should be a separate module available to all device 
mapper targets, such as dm-io, dm-kcopyd or so?

Mikulas

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2011-09-27 17:20 dm-bufio Mikulas Patocka
  2011-09-28  9:41 ` dm-bufio Joe Thornber
@ 2011-09-28  9:51 ` Heinz Mauelshagen
  1 sibling, 0 replies; 51+ messages in thread
From: Heinz Mauelshagen @ 2011-09-28  9:51 UTC (permalink / raw)
  To: device-mapper development; +Cc: Edward Thornber

On Tue, 2011-09-27 at 13:20 -0400, Mikulas Patocka wrote:
> Hi
> 
> I placed updated dm-bufio here: 
> http://people.redhat.com/mpatocka/patches/kernel/dm-thinp-bufio/
> 
> It uses kmem_cache with proper alignment instead of kmalloc (which may 
> return unaligned memory), so it should fix problems that Heinz observed on 
> Sparc.

Thanks Mikulas,

I'll test once Joe has merged it.

Heinz

> 
> Mikulas
> 
> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: dm-bufio
  2011-09-27 17:20 dm-bufio Mikulas Patocka
@ 2011-09-28  9:41 ` Joe Thornber
  2011-09-28  9:51 ` dm-bufio Heinz Mauelshagen
  1 sibling, 0 replies; 51+ messages in thread
From: Joe Thornber @ 2011-09-28  9:41 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: Heinz Mauelshagen, dm-devel

On Tue, Sep 27, 2011 at 01:20:49PM -0400, Mikulas Patocka wrote:
> Hi
> 
> I placed updated dm-bufio here: 
> http://people.redhat.com/mpatocka/patches/kernel/dm-thinp-bufio/
> 
> It uses kmem_cache with proper alignment instead of kmalloc (which may 
> return unaligned memory), so it should fix problems that Heinz observed on 
> Sparc.

Thanks Mikulas,  I'll merge straight away.

- Joe

^ permalink raw reply	[flat|nested] 51+ messages in thread

* dm-bufio
@ 2011-09-27 17:20 Mikulas Patocka
  2011-09-28  9:41 ` dm-bufio Joe Thornber
  2011-09-28  9:51 ` dm-bufio Heinz Mauelshagen
  0 siblings, 2 replies; 51+ messages in thread
From: Mikulas Patocka @ 2011-09-27 17:20 UTC (permalink / raw)
  To: Edward Thornber, Heinz Mauelshagen; +Cc: dm-devel

Hi

I placed updated dm-bufio here: 
http://people.redhat.com/mpatocka/patches/kernel/dm-thinp-bufio/

It uses kmem_cache with proper alignment instead of kmalloc (which may 
return unaligned memory), so it should fix problems that Heinz observed on 
Sparc.

Mikulas

^ permalink raw reply	[flat|nested] 51+ messages in thread

end of thread, other threads:[~2012-03-27 10:20 UTC | newest]

Thread overview: 51+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-10-14 19:14 [PATCH] dm-bufio Mikulas Patocka
2011-10-17 10:08 ` Joe Thornber
2011-10-17 12:41   ` dm-bufio Mike Snitzer
2011-10-17 14:04   ` [PATCH] dm-bufio Mikulas Patocka
2011-10-17 14:15     ` Joe Thornber
2011-10-17 10:29 ` Joe Thornber
2011-10-17 10:39 ` Joe Thornber
2011-10-17 10:43 ` Joe Thornber
2011-10-17 10:54 ` Joe Thornber
2011-10-17 13:47   ` Mikulas Patocka
2011-10-17 13:57     ` Joe Thornber
2011-10-17 10:57 ` Joe Thornber
2011-10-17 11:11 ` Joe Thornber
2011-10-17 14:05   ` Mikulas Patocka
2011-10-17 14:09     ` Joe Thornber
2011-10-17 16:22       ` Mikulas Patocka
2011-10-17 16:41         ` Joe Thornber
2011-10-17 18:30         ` Joe Thornber
2011-10-17 11:14 ` Joe Thornber
2011-10-17 13:43   ` Mikulas Patocka
2011-10-17 11:29 ` Joe Thornber
2011-10-17 16:24   ` Mikulas Patocka
2011-10-17 16:43     ` Joe Thornber
  -- strict thread matches above, loose matches on Subject: below --
2012-03-23 11:01 dm-bufio Kasatkin, Dmitry
2012-03-23 11:10 ` dm-bufio Zdenek Kabelac
2012-03-23 11:26   ` dm-bufio Kasatkin, Dmitry
2012-03-23 14:12     ` dm-bufio Kasatkin, Dmitry
2012-03-23 14:21       ` dm-bufio Mike Snitzer
2012-03-23 14:29         ` dm-bufio Kasatkin, Dmitry
2012-03-23 14:32           ` dm-bufio Mike Snitzer
2012-03-23 15:17             ` dm-bufio Kasatkin, Dmitry
2012-03-23 21:32               ` dm-bufio Mike Snitzer
2012-03-24  0:04                 ` dm-bufio Kasatkin, Dmitry
2012-03-23 16:22       ` dm-bufio Mikulas Patocka
2012-03-23 23:58         ` dm-bufio Kasatkin, Dmitry
2012-03-24  1:07           ` dm-bufio Mikulas Patocka
2012-03-24 18:51             ` dm-bufio Will Drewry
2012-03-27 10:20               ` dm-bufio Kasatkin, Dmitry
2012-03-27  9:56             ` dm-bufio Kasatkin, Dmitry
2011-10-13 15:05 dm-bufio Mikulas Patocka
2011-10-13 19:40 ` dm-bufio Mike Snitzer
2011-10-13 22:19   ` dm-bufio Mikulas Patocka
2011-10-13 22:31     ` dm-bufio Mike Snitzer
2011-10-14  9:19       ` dm-bufio Joe Thornber
2011-10-14  9:15 ` dm-bufio Joe Thornber
2011-10-14 12:19   ` dm-bufio Joe Thornber
2011-10-17 16:49   ` dm-bufio Mikulas Patocka
2011-10-17 18:01     ` dm-bufio Joe Thornber
2011-09-27 17:20 dm-bufio Mikulas Patocka
2011-09-28  9:41 ` dm-bufio Joe Thornber
2011-09-28  9:51 ` dm-bufio Heinz Mauelshagen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.