[PATCH] dm-bufio

* [PATCH] dm-bufio
@ 2011-10-14 19:14 Mikulas Patocka
  2011-10-17 10:08 ` Joe Thornber
                   ` (8 more replies)
  0 siblings, 9 replies; 51+ messages in thread
From: Mikulas Patocka @ 2011-10-14 19:14 UTC (permalink / raw)
  To: Alasdair G. Kergon; +Cc: dm-devel, Edward Thornber, Mike Snitzer

Hi

This is a patch for dm-bufio.

Changes:
* fixed a bug in i/o submission, introduced by someone who changed the 
code
* simplified some constructs
* more likely/unlikely hints
* dm-bufio.h moved from drivers/md to include/linux
* put cond_resched() to loops (it was there originally and then someone 
deleted it)

Mikulas

---
 drivers/md/Kconfig       |    2 
 drivers/md/dm-bufio.c    |   95 ++++++++++++++++++++++++----------------
 drivers/md/dm-bufio.h    |  110 -----------------------------------------------
 include/linux/dm-bufio.h |  110 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 169 insertions(+), 148 deletions(-)

Index: linux-3.1-rc3-fast/drivers/md/Kconfig
===================================================================

--- linux-3.1-rc3-fast.orig/drivers/md/Kconfig	2011-10-14 20:56:45.000000000 +0200
+++ linux-3.1-rc3-fast/drivers/md/Kconfig	2011-10-14 20:56:46.000000000 +0200
@@ -209,7 +209,7 @@ config DM_DEBUG
 	  If unsure, say N.
 
 config DM_BUFIO
-       tristate
+       tristate "Buffered IO"
        depends on BLK_DEV_DM && EXPERIMENTAL
        ---help---
 	 This interface allows you to do buffered I/O on a device and acts
Index: linux-3.1-rc3-fast/drivers/md/dm-bufio.c
===================================================================
--- linux-3.1-rc3-fast.orig/drivers/md/dm-bufio.c	2011-10-14 20:56:45.000000000 +0200
+++ linux-3.1-rc3-fast/drivers/md/dm-bufio.c	2011-10-14 20:57:10.000000000 +0200
@@ -167,6 +167,12 @@ static void dm_bufio_unlock(struct dm_bu
 	mutex_unlock(&c->lock);
 }
 
+#ifdef CONFIG_PREEMPT
+#define dm_bufio_cond_resched()		do { } while (0)
+#else
+#define dm_bufio_cond_resched()		cond_resched()
+#endif
+
 /*----------------------------------------------------------------*/
 
 /* Default cache size --- available memory divided by the ratio */
@@ -470,17 +476,18 @@ static void use_inline_bio(struct dm_buf
 	ptr = b->data;
 	len = b->c->block_size;
 
-	if (len >= PAGE_SIZE)
+	if (likely(len >= PAGE_SIZE))
 		BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
 	else
 		BUG_ON((unsigned long)ptr & (len - 1));
 
 	do {
-		if (!bio_add_page(&b->bio, virt_to_page(ptr),
-				  len < PAGE_SIZE ? len : PAGE_SIZE,
-				  virt_to_phys(ptr) & (PAGE_SIZE - 1))) {
+		if (unlikely(!bio_add_page(&b->bio, virt_to_page(ptr),
+					len < PAGE_SIZE ? len : PAGE_SIZE,
+					virt_to_phys(ptr) & (PAGE_SIZE - 1)))) {
 			BUG_ON(b->c->block_size <= PAGE_SIZE);
 			use_dmio(b, rw, block, end_io);
+			return;
 		}
 
 		len -= PAGE_SIZE;
@@ -493,8 +500,10 @@ static void use_inline_bio(struct dm_buf
 static void submit_io(struct dm_buffer *b, int rw, sector_t block,
 		      bio_end_io_t *end_io)
 {
-	if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
-	    b->data_mode != DATA_MODE_VMALLOC)
+	if (rw == WRITE && b->c->write_callback)
+		b->c->write_callback(b);
+	if (likely(b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE) &&
+	    likely(b->data_mode != DATA_MODE_VMALLOC))
 		use_inline_bio(b, rw, block, end_io);
 	else
 		use_dmio(b, rw, block, end_io);
@@ -514,7 +523,7 @@ static void write_endio(struct bio *bio,
 {
 	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 	b->write_error = error;
-	if (error) {
+	if (unlikely(error)) {
 		struct dm_bufio_client *c = b->c;
 		(void)cmpxchg(&c->async_write_error, 0, error);
 	}
@@ -550,8 +559,6 @@ static void __write_dirty_buffer(struct 
 	clear_bit(B_DIRTY, &b->state);
 	wait_on_bit_lock(&b->state, B_WRITING,
 			 do_io_schedule, TASK_UNINTERRUPTIBLE);
-	if (b->c->write_callback)
-		b->c->write_callback(b);
 	submit_io(b, WRITE, b->block, write_endio);
 }
 
@@ -572,7 +579,7 @@ static void __make_buffer_clean(struct d
 
 /*
  * Find some buffer that is not held by anybody, clean it, unlink it and
- * return it.  If "wait" is zero, try less hard and don't block.
+ * return it.
  */
 static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
 {
@@ -585,6 +592,7 @@ static struct dm_buffer *__get_unclaimed
 			__unlink_buffer(b);
 			return b;
 		}
+		dm_bufio_cond_resched();
 	}
 
 	list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
@@ -594,6 +602,7 @@ static struct dm_buffer *__get_unclaimed
 			__unlink_buffer(b);
 			return b;
 		}
+		dm_bufio_cond_resched();
 	}
 	return NULL;
 }
@@ -636,18 +645,21 @@ static struct dm_buffer *__alloc_buffer_
 		 * This is useful for debugging. When we set cache size to 1,
 		 * no new buffers are allocated at all.
 		 */
-		if (dm_bufio_cache_size_latch != 1) {
+		if (likely(dm_bufio_cache_size_latch != 1)) {
 			/*
-			 * dm-bufio is resistant to allocation failures (it just keeps
-			 * one buffer reserved in cases all the allocations fail).
+			 * dm-bufio is resistant to allocation failures (it
+			 * just keeps one buffer reserved in cases all the
+			 * allocations fail).
 			 * So set flags to not try too hard:
 			 *	GFP_NOIO: don't recurse into the I/O layer
-			 *	__GFP_NORETRY: don't retry and rather return failure
+			 *	__GFP_NORETRY: don't retry and rather return
+			 *			failure
 			 *	__GFP_NOMEMALLOC: don't use emergency reserves
-			 *	__GFP_NOWARN: don't print a warning in case of failure
+			 *	__GFP_NOWARN: don't print a warning in case of
+			 *			failure
 			 */
 			b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
-			if (b)
+			if (likely(b != NULL))
 				return b;
 		}
 
@@ -670,7 +682,7 @@ static struct dm_buffer *__alloc_buffer_
 static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c)
 {
 	struct dm_buffer *b = __alloc_buffer_wait_no_callback(c);
-	if (b && c->alloc_callback)
+	if (c->alloc_callback)
 		c->alloc_callback(b);
 	return b;
 }
@@ -682,7 +694,7 @@ static void __free_buffer_wake(struct dm
 {
 	struct dm_bufio_client *c = b->c;
 
-	if (c->need_reserved_buffers) {
+	if (unlikely(c->need_reserved_buffers != 0)) {
 		list_add(&b->lru_list, &c->reserved_buffers);
 		c->need_reserved_buffers--;
 	} else
@@ -704,6 +716,7 @@ static void __write_dirty_buffers_async(
 		if (no_wait && test_bit(B_WRITING, &b->state))
 			return;
 		__write_dirty_buffer(b);
+		dm_bufio_cond_resched();
 	}
 }
 
@@ -716,7 +729,7 @@ static void __get_memory_limit(struct dm
 {
 	unsigned long buffers;
 
-	if (dm_bufio_cache_size != dm_bufio_cache_size_latch) {
+	if (unlikely(dm_bufio_cache_size != dm_bufio_cache_size_latch)) {
 		mutex_lock(&dm_bufio_clients_lock);
 		__cache_size_refresh();
 		mutex_unlock(&dm_bufio_clients_lock);
@@ -724,7 +737,7 @@ static void __get_memory_limit(struct dm
 
 	buffers = dm_bufio_cache_size_per_client >>
 		  (c->sectors_per_block_bits + SECTOR_SHIFT);
-	if (buffers < DM_BUFIO_MIN_BUFFERS)
+	if (unlikely(buffers < DM_BUFIO_MIN_BUFFERS))
 		buffers = DM_BUFIO_MIN_BUFFERS;
 	*limit_buffers = buffers;
 	*threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100;
@@ -747,6 +760,7 @@ static void __check_watermark(struct dm_
 		if (!b)
 			return;
 		__free_buffer_wake(b);
+		dm_bufio_cond_resched();
 	}
 	if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
 		__write_dirty_buffers_async(c, 1);
@@ -758,8 +772,9 @@ static struct dm_buffer *__find(struct d
 	struct dm_buffer *b;
 	struct hlist_node *hn;
 	hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)], hash_list) {
-		if (b->block == block)
+		if (likely(b->block == block))
 			return b;
+		dm_bufio_cond_resched();
 	}
 
 	return NULL;
@@ -775,12 +790,13 @@ enum new_flag {
 	NF_GET = 2
 };
 
-static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, enum new_flag nf,
-				     struct dm_buffer **bp, int *need_submit)
+static void read_endio(struct bio *bio, int error);
+
+static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
+				     enum new_flag nf, struct dm_buffer **bp)
 {
 	struct dm_buffer *b, *new_b = NULL;
 
-	*need_submit = 0;
 	b = __find(c, block);
 	if (b) {
 		b->hold_count++;
@@ -821,7 +837,9 @@ static struct dm_buffer *__bufio_new(str
 	}
 
 	b->state = 1 << B_READING;
-	*need_submit = 1;
+
+	submit_io(b, READ, b->block, read_endio);
+
 	return b;
 }
 
@@ -849,22 +867,18 @@ static void read_endio(struct bio *bio, 
 static void *new_read(struct dm_bufio_client *c, sector_t block, enum new_flag nf,
 		      struct dm_buffer **bp)
 {
-	int need_submit;
 	struct dm_buffer *b;
 
 	dm_bufio_lock(c);
-	b = __bufio_new(c, block, nf, bp, &need_submit);
+	b = __bufio_new(c, block, nf, bp);
 	dm_bufio_unlock(c);
 
-	if (!b || IS_ERR(b))
+	if (unlikely(!b) || unlikely(IS_ERR(b)))
 		return b;
 	else {
-		if (need_submit)
-			submit_io(b, READ, b->block, read_endio);
-
 		wait_on_bit(&b->state, B_READING,
 			    do_io_schedule, TASK_UNINTERRUPTIBLE);
-		if (b->read_error != 0) {
+		if (unlikely(b->read_error != 0)) {
 			int error = b->read_error;
 			dm_bufio_release(b);
 			return ERR_PTR(error);
@@ -904,7 +918,7 @@ void dm_bufio_release(struct dm_buffer *
 	BUG_ON(test_bit(B_READING, &b->state));
 	BUG_ON(!b->hold_count);
 	b->hold_count--;
-	if (!b->hold_count) {
+	if (likely(!b->hold_count)) {
 		wake_up(&c->free_buffer_wait);
 
 		/*
@@ -912,7 +926,7 @@ void dm_bufio_release(struct dm_buffer *
 		 * to be written, free the buffer. There is no point in caching
 		 * invalid buffer.
 		 */
-		if ((b->read_error || b->write_error) &&
+		if (unlikely((b->read_error | b->write_error) != 0) &&
 		    !test_bit(B_WRITING, &b->state) &&
 		    !test_bit(B_DIRTY, &b->state)) {
 			__unlink_buffer(b);
@@ -999,15 +1013,19 @@ again:
 		 * someone is doing some writes simultaneously with us --- in
 		 * this case, stop dropping the lock.
 		 */
+		dm_bufio_cond_resched();
 		if (dropped_lock)
 			goto again;
 	}
 	wake_up(&c->free_buffer_wait);
 	dm_bufio_unlock(c);
 
-	a = xchg(&c->async_write_error, 0);
+	if (likely(!c->async_write_error))
+		a = 0;
+	else
+		a = xchg(&c->async_write_error, 0);
 	f = dm_bufio_issue_flush(c);
-	if (a)
+	if (unlikely(a != 0))
 		return a;
 	return f;
 }
@@ -1071,7 +1089,7 @@ retry:
 	BUG_ON(!b->hold_count);
 	BUG_ON(test_bit(B_READING, &b->state));
 	__write_dirty_buffer(b);
-	if (b->hold_count == 1) {
+	if (likely(b->hold_count == 1)) {
 		wait_on_bit(&b->state, B_WRITING,
 			    do_io_schedule, TASK_UNINTERRUPTIBLE);
 		set_bit(B_DIRTY, &b->state);
@@ -1193,6 +1211,7 @@ static void __scan(struct dm_bufio_clien
 				if (!--nr_to_scan)
 					return;
 			}
+			dm_bufio_cond_resched();
 		}
 	}
 }
@@ -1406,9 +1425,11 @@ static void cleanup_old_buffers(void)
 				       struct dm_buffer, lru_list);
 			if (__cleanup_old_buffer(b, 0, max_age))
 				break;
+			dm_bufio_cond_resched();
 		}
 
 		dm_bufio_unlock(c);
+		dm_bufio_cond_resched();
 	}
 	mutex_unlock(&dm_bufio_clients_lock);
 }
Index: linux-3.1-rc3-fast/drivers/md/dm-bufio.h
===================================================================
--- linux-3.1-rc3-fast.orig/drivers/md/dm-bufio.h	2011-10-14 20:56:45.000000000 +0200
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,110 +0,0 @@
-/*
- * Copyright (C) 2009-2011 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_BUFIO_H
-#define DM_BUFIO_H
-
-#include <linux/blkdev.h>
-#include <linux/types.h>
-
-/*----------------------------------------------------------------*/
-
-struct dm_bufio_client;
-struct dm_buffer;
-
-/*
- * Create a buffered IO cache on a given device
- */
-struct dm_bufio_client *
-dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
-		       unsigned reserved_buffers, unsigned aux_size,
-		       void (*alloc_callback)(struct dm_buffer *),
-		       void (*write_callback)(struct dm_buffer *));
-
-/*
- * Release a buffered IO cache.
- */
-void dm_bufio_client_destroy(struct dm_bufio_client *c);
-
-/*
- * WARNING: to avoid deadlocks, these conditions are observed:
- *
- * - At most one thread can hold at most "reserved_buffers" simultaneously.
- * - Each other threads can hold at most one buffer.
- * - Threads which call only dm_bufio_get can hold unlimited number of
- *   buffers.
- */
-
-/*
- * Read a given block from disk. Returns pointer to data.  Returns a
- * pointer to dm_buffer that can be used to release the buffer or to make
- * it dirty.
- */
-void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
-		    struct dm_buffer **bp);
-
-/*
- * Like dm_bufio_read, but return buffer from cache, don't read
- * it. If the buffer is not in the cache, return NULL.
- */
-void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
-		   struct dm_buffer **bp);
-
-/*
- * Like dm_bufio_read, but don't read anything from the disk.  It is
- * expected that the caller initializes the buffer and marks it dirty.
- */
-void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
-		   struct dm_buffer **bp);
-
-/*
- * Release a reference obtained with dm_bufio_{read,get,new}. The data
- * pointer and dm_buffer pointer is no longer valid after this call.
- */
-void dm_bufio_release(struct dm_buffer *b);
-
-/*
- * Mark a buffer dirty. It should be called after the buffer is modified.
- *
- * In case of memory pressure, the buffer may be written after
- * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers.  So
- * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but
- * the actual writing may occur earlier.
- */
-void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
-
-/*
- * Initiate writing of dirty buffers, without waiting for completion.
- */
-void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
-
-/*
- * Write all dirty buffers. Guarantees that all dirty buffers created prior
- * to this call are on disk when this call exits.
- */
-int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c);
-
-/*
- * Send an empty write barrier to the device to flush hardware disk cache.
- */
-int dm_bufio_issue_flush(struct dm_bufio_client *c);
-
-/*
- * Like dm_bufio_release but also move the buffer to the new
- * block. dm_bufio_write_dirty_buffers is needed to commit the new block.
- */
-void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block);
-
-unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
-sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
-sector_t dm_bufio_get_block_number(struct dm_buffer *b);
-void *dm_bufio_get_block_data(struct dm_buffer *b);
-void *dm_bufio_get_aux_data(struct dm_buffer *b);
-struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b);
-
-/*----------------------------------------------------------------*/
-
-#endif
Index: linux-3.1-rc3-fast/include/linux/dm-bufio.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-3.1-rc3-fast/include/linux/dm-bufio.h	2011-10-14 20:56:46.000000000 +0200
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2009-2011 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_BUFIO_H
+#define DM_BUFIO_H
+
+#include <linux/blkdev.h>
+#include <linux/types.h>
+
+/*----------------------------------------------------------------*/
+
+struct dm_bufio_client;
+struct dm_buffer;
+
+/*
+ * Create a buffered IO cache on a given device
+ */
+struct dm_bufio_client *
+dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
+		       unsigned reserved_buffers, unsigned aux_size,
+		       void (*alloc_callback)(struct dm_buffer *),
+		       void (*write_callback)(struct dm_buffer *));
+
+/*
+ * Release a buffered IO cache.
+ */
+void dm_bufio_client_destroy(struct dm_bufio_client *c);
+
+/*
+ * WARNING: to avoid deadlocks, these conditions are observed:
+ *
+ * - At most one thread can hold at most "reserved_buffers" simultaneously.
+ * - Each other threads can hold at most one buffer.
+ * - Threads which call only dm_bufio_get can hold unlimited number of
+ *   buffers.
+ */
+
+/*
+ * Read a given block from disk. Returns pointer to data.  Returns a
+ * pointer to dm_buffer that can be used to release the buffer or to make
+ * it dirty.
+ */
+void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
+		    struct dm_buffer **bp);
+
+/*
+ * Like dm_bufio_read, but return buffer from cache, don't read
+ * it. If the buffer is not in the cache, return NULL.
+ */
+void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
+		   struct dm_buffer **bp);
+
+/*
+ * Like dm_bufio_read, but don't read anything from the disk.  It is
+ * expected that the caller initializes the buffer and marks it dirty.
+ */
+void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
+		   struct dm_buffer **bp);
+
+/*
+ * Release a reference obtained with dm_bufio_{read,get,new}. The data
+ * pointer and dm_buffer pointer is no longer valid after this call.
+ */
+void dm_bufio_release(struct dm_buffer *b);
+
+/*
+ * Mark a buffer dirty. It should be called after the buffer is modified.
+ *
+ * In case of memory pressure, the buffer may be written after
+ * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers.  So
+ * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but
+ * the actual writing may occur earlier.
+ */
+void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
+
+/*
+ * Initiate writing of dirty buffers, without waiting for completion.
+ */
+void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
+
+/*
+ * Write all dirty buffers. Guarantees that all dirty buffers created prior
+ * to this call are on disk when this call exits.
+ */
+int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c);
+
+/*
+ * Send an empty write barrier to the device to flush hardware disk cache.
+ */
+int dm_bufio_issue_flush(struct dm_bufio_client *c);
+
+/*
+ * Like dm_bufio_release but also move the buffer to the new
+ * block. dm_bufio_write_dirty_buffers is needed to commit the new block.
+ */
+void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block);
+
+unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
+sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
+sector_t dm_bufio_get_block_number(struct dm_buffer *b);
+void *dm_bufio_get_block_data(struct dm_buffer *b);
+void *dm_bufio_get_aux_data(struct dm_buffer *b);
+struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b);
+
+/*----------------------------------------------------------------*/
+
+#endif

^ permalink raw reply	[flat|nested] 51+ messages in thread