[PATCH] softirq block request completion

* [PATCH] softirq block request completion
@ 2004-10-21  8:23 Jens Axboe
  2004-10-22  0:38 ` Rusty Russell
  0 siblings, 1 reply; 2+ messages in thread
From: Jens Axboe @ 2004-10-21  8:23 UTC (permalink / raw)
  To: linux-kernel, mingo

Hi,

I wanted to play with this after the io completion latency talks started
some time ago. So I got an hour to do so this morning, this is what I
came up with. It's only lightly tested, but it does boot and survives a
dbench on IDE and SCSI.

This is just an RFC, not a call for inclusion (as if the above should
not be enough of a warning :-)

Signed-off-by: Jens Axboe <axboe@suse.de>

===== drivers/block/ll_rw_blk.c 1.274 vs edited =====

--- 1.274/drivers/block/ll_rw_blk.c	2004-10-20 10:37:07 +02:00
+++ edited/drivers/block/ll_rw_blk.c	2004-10-21 10:21:57 +02:00
@@ -28,6 +28,8 @@
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
+#include <linux/interrupt.h>
+#include <linux/hash.h>
 
 /*
  * for max sense size
@@ -60,13 +62,27 @@
 /*
  * Controlling structure to kblockd
  */
-static struct workqueue_struct *kblockd_workqueue; 
+static struct workqueue_struct *kblockd_workqueue;
 
 unsigned long blk_max_low_pfn, blk_max_pfn;
 
 EXPORT_SYMBOL(blk_max_low_pfn);
 EXPORT_SYMBOL(blk_max_pfn);
 
+/*
+ * per-cpu completion stuff. try to ammortize the cost of getting the
+ * queue_lock by collecting queues in different lists. this should be related
+ * to NR_CPUS as well, no point in making BLK_NR_COMP_QUEUES bigger than the
+ * number of cpus in the system.
+ */
+#define BLK_NR_COMP_SHIFT	2
+#define BLK_NR_COMP_QUEUES	(1 << BLK_NR_COMP_SHIFT)
+
+struct blk_completion_queue {
+	struct list_head done_list[BLK_NR_COMP_QUEUES];
+};
+static DEFINE_PER_CPU(struct blk_completion_queue, blk_cpu_done);
+
 /* Amount of time in which a process may batch requests */
 #define BLK_BATCH_TIME	(HZ/50UL)
 
@@ -2968,6 +2984,113 @@
 EXPORT_SYMBOL(end_that_request_chunk);
 
 /*
+ * end io completely on this list of requests. operation is split into two
+ * parts - one that can run without the queue lock held, and one which
+ * requires it.
+ */
+#define list_entry_done(entry) list_entry((entry), struct request, donelist)
+static void process_completion_queue(struct list_head *list)
+{
+	LIST_HEAD(local_list);
+	request_queue_t *q;
+	static int max;
+	int i = 0;
+
+	while (!list_empty(list)) {
+		struct request *rq = list_entry_done(list->next);
+		unsigned int bytes_done;
+		int uptodate;
+
+		list_move(&rq->donelist, &local_list);
+		i++;
+
+		/*
+		 * fs or pc request
+		 */
+		if (blk_fs_request(rq))
+			bytes_done = rq->hard_nr_sectors << 9;
+		else
+			bytes_done = rq->data_len;
+
+		uptodate = 1;
+		if (rq->errors)
+			uptodate = rq->errors;
+
+		end_that_request_chunk(rq, uptodate, bytes_done);
+		add_disk_randomness(rq->rq_disk);
+	}
+
+	q = NULL;
+	list = &local_list;
+	while (!list_empty(list)) {
+		struct request *rq = list_entry_done(list->next);
+
+		list_del_init(&rq->donelist);
+
+		if (q != rq->q) {
+			if (q)
+				spin_unlock_irq(q->queue_lock);
+
+			q = rq->q;
+			spin_lock_irq(q->queue_lock);
+		}
+
+		if (blk_rq_tagged(rq))
+			blk_queue_end_tag(q, rq);
+
+		end_that_request_last(rq);
+	}
+
+	if (q)
+		spin_unlock_irq(q->queue_lock);
+}
+
+/*
+ * splice the completion data to a local structure and hand off to
+ * process_completion_queue() to complete the requests
+ */
+static void blk_done_softirq(struct softirq_action *h)
+{
+	struct blk_completion_queue local_bcq;
+	struct blk_completion_queue *bcq;
+	int i;
+
+	for (i = 0; i < BLK_NR_COMP_QUEUES; i++)
+		INIT_LIST_HEAD(&local_bcq.done_list[i]);
+
+	local_irq_disable();
+	bcq = &__get_cpu_var(blk_cpu_done);
+
+	for (i = 0; i < BLK_NR_COMP_QUEUES; i++)
+		list_splice_init(&bcq->done_list[i], &local_bcq.done_list[i]);
+
+	local_irq_enable();
+
+	for (i = 0; i < BLK_NR_COMP_QUEUES; i++)
+		process_completion_queue(&local_bcq.done_list[i]);
+}
+
+void end_that_request_completely(struct request *req)
+{
+	long index = hash_ptr(req->q, BLK_NR_COMP_SHIFT);
+	struct blk_completion_queue *bcq;
+	unsigned long flags;
+
+	INIT_LIST_HEAD(&req->donelist);
+	BUG_ON(!blk_pc_request(req) && !blk_fs_request(req));
+
+	local_irq_save(flags);
+
+	bcq = &__get_cpu_var(blk_cpu_done);
+	list_add_tail(&req->donelist, &bcq->done_list[index]);
+	raise_softirq_irqoff(BLOCK_SOFTIRQ);
+
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(end_that_request_completely);
+	
+/*
  * queue lock must be held
  */
 void end_that_request_last(struct request *req)
@@ -3064,6 +3187,8 @@
 
 int __init blk_dev_init(void)
 {
+	int i, j;
+
 	kblockd_workqueue = create_workqueue("kblockd");
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
@@ -3076,6 +3201,15 @@
 
 	iocontext_cachep = kmem_cache_create("blkdev_ioc",
 			sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
+
+	for (i = 0; i < NR_CPUS; i++) {
+		struct blk_completion_queue *bcq = &per_cpu(blk_cpu_done, i);
+
+		for (j = 0; j < BLK_NR_COMP_QUEUES; j++)
+			INIT_LIST_HEAD(&bcq->done_list[j]);
+	}
+
+	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
 
 	blk_max_low_pfn = max_low_pfn;
 	blk_max_pfn = max_pfn;
===== drivers/ide/ide-io.c 1.32 vs edited =====
--- 1.32/drivers/ide/ide-io.c	2004-10-20 18:52:21 +02:00
+++ edited/drivers/ide/ide-io.c	2004-10-21 09:02:12 +02:00
@@ -135,6 +135,12 @@
 		HWGROUP(drive)->hwif->ide_dma_on(drive);
 	}
 
+	if (rq_all_done(rq, nr_sectors << 9)) {
+		blkdev_dequeue_request(rq);
+		end_that_request_completely(rq);
+		goto next;
+	}
+
 	if (!end_that_request_first(rq, uptodate, nr_sectors)) {
 		add_disk_randomness(rq->rq_disk);
 
@@ -142,8 +148,9 @@
 			blk_queue_end_tag(drive->queue, rq);
 
 		blkdev_dequeue_request(rq);
-		HWGROUP(drive)->rq = NULL;
 		end_that_request_last(rq);
+next:
+		HWGROUP(drive)->rq = NULL;
 		ret = 0;
 	}
 	return ret;
===== drivers/scsi/scsi_lib.c 1.137 vs edited =====
--- 1.137/drivers/scsi/scsi_lib.c	2004-10-19 08:06:58 +02:00
+++ edited/drivers/scsi/scsi_lib.c	2004-10-21 08:45:15 +02:00
@@ -522,6 +522,11 @@
 	struct request *req = cmd->request;
 	unsigned long flags;
 
+	if (rq_all_done(req, bytes)) {
+		end_that_request_completely(req);
+		goto next;
+	}
+
 	/*
 	 * If there are blocks left over at the end, set up the command
 	 * to queue the remainder of them.
@@ -560,6 +565,7 @@
 	 * This will goose the queue request function at the end, so we don't
 	 * need to worry about launching another command.
 	 */
+next:
 	scsi_next_command(cmd);
 	return NULL;
 }
===== include/linux/blkdev.h 1.154 vs edited =====
--- 1.154/include/linux/blkdev.h	2004-10-19 11:40:18 +02:00
+++ edited/include/linux/blkdev.h	2004-10-21 10:06:44 +02:00
@@ -106,9 +106,8 @@
  * try to put the fields that are referenced together in the same cacheline
  */
 struct request {
-	struct list_head queuelist; /* looking for ->queue? you must _not_
-				     * access it directly, use
-				     * blkdev_dequeue_request! */
+	struct list_head queuelist;
+	struct list_head donelist;
 	unsigned long flags;		/* see REQ_ bits below */
 
 	/* Maintain bio traversal state for part by part I/O submission.
@@ -591,6 +590,16 @@
 extern void end_that_request_last(struct request *);
 extern int process_that_request_first(struct request *, unsigned int);
 extern void end_request(struct request *req, int uptodate);
+extern void end_that_request_completely(struct request *);
+
+static inline int rq_all_done(struct request *rq, unsigned int nr_bytes)
+{
+	if ((blk_fs_request(rq) && nr_bytes >= (rq->hard_nr_sectors << 9)) ||
+	    (blk_pc_request(rq) && nr_bytes >= rq->data_len))
+		return 1;
+
+	return 0;
+}
 
 /*
  * end_that_request_first/chunk() takes an uptodate argument. we account
===== include/linux/interrupt.h 1.31 vs edited =====
--- 1.31/include/linux/interrupt.h	2004-10-19 07:26:39 +02:00
+++ edited/include/linux/interrupt.h	2004-10-21 08:45:15 +02:00
@@ -89,6 +89,7 @@
 	NET_TX_SOFTIRQ,
 	NET_RX_SOFTIRQ,
 	SCSI_SOFTIRQ,
+	BLOCK_SOFTIRQ,
 	TASKLET_SOFTIRQ
 };
 

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 2+ messages in thread