# This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet 1.607 -> 1.608 # include/linux/elevator.h 1.11 -> 1.12 # drivers/block/ll_rw_blk.c 1.109 -> 1.110 # drivers/block/Makefile 1.9 -> 1.10 # (new) -> 1.1 drivers/block/deadline-iosched.c # # The following is the BitKeeper ChangeSet Log # -------------------------------------------- # 02/09/25 axboe@burns.home.kernel.dk 1.608 # deadline io scheduler # -------------------------------------------- # diff -Nru a/drivers/block/Makefile b/drivers/block/Makefile --- a/drivers/block/Makefile Wed Sep 25 19:16:26 2002 +++ b/drivers/block/Makefile Wed Sep 25 19:16:26 2002 @@ -9,9 +9,9 @@ # export-objs := elevator.o ll_rw_blk.o loop.o genhd.o acsi.o \ - block_ioctl.o + block_ioctl.o deadline-iosched.o -obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o block_ioctl.o +obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o block_ioctl.o deadline-iosched.o obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o diff -Nru a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/block/deadline-iosched.c Wed Sep 25 19:16:26 2002 @@ -0,0 +1,557 @@ +/* + * linux/drivers/block/deadline-iosched.c + * + * Deadline i/o scheduler. + * + * Copyright (C) 2002 Jens Axboe + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * feel free to try other values :-). read_expire value is the timeout for + * reads, our goal is to start a request "around" the time when it expires. + * fifo_batch is how many steps along the sorted list we will take when the + * front fifo request expires. + */ +static int read_expire = HZ / 2; /* 500ms start timeout */ +static int fifo_batch = 64; /* 4 seeks, or 64 contig */ +static int seek_cost = 16; /* seek is 16 times more expensive */ + +/* + * how many times reads are allowed to starve writes + */ +static int writes_starved = 2; + +static const int deadline_hash_shift = 8; +#define DL_HASH_BLOCK(sec) ((sec) >> 3) +#define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift)) +#define DL_HASH_ENTRIES (1 << deadline_hash_shift) + +#define DL_INVALIDATE_HASH(dd) \ + do { \ + if (!++(dd)->hash_valid_count) \ + (dd)->hash_valid_count = 1; \ + } while (0) + +struct deadline_data { + /* + * run time data + */ + struct list_head sort_list[2]; /* sorted listed */ + struct list_head read_fifo; /* fifo list */ + struct list_head *dispatch; /* driver dispatch queue */ + struct list_head *hash; /* request hash */ + sector_t last_sector; /* last sector sent to drive */ + unsigned long hash_valid_count; /* barrier hash count */ + unsigned int starved; /* writes starved */ + + /* + * settings that change how the i/o scheduler behaves + */ + unsigned int fifo_batch; + unsigned long read_expire; + unsigned int seek_cost; + unsigned int writes_starved; +}; + +/* + * pre-request data. + */ +struct deadline_rq { + struct list_head fifo; + struct list_head hash; + unsigned long hash_valid_count; + struct request *request; + unsigned long expires; +}; + +static kmem_cache_t *drq_pool; + +#define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private) + +/* + * rq hash + */ +static inline void __deadline_del_rq_hash(struct deadline_rq *drq) +{ + drq->hash_valid_count = 0; + list_del_init(&drq->hash); +} + +#define ON_HASH(drq) (drq)->hash_valid_count +static inline void deadline_del_rq_hash(struct deadline_rq *drq) +{ + if (ON_HASH(drq)) + __deadline_del_rq_hash(drq); +} + +static inline void +deadline_add_rq_hash(struct deadline_data *dd, struct deadline_rq *drq) +{ + struct request *rq = drq->request; + + BUG_ON(ON_HASH(drq)); + + drq->hash_valid_count = dd->hash_valid_count; + list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq->sector +rq->nr_sectors)]); +} + +#define list_entry_hash(ptr) list_entry((ptr), struct deadline_rq, hash) +static struct request * +deadline_find_hash(struct deadline_data *dd, sector_t offset) +{ + struct list_head *hash_list = &dd->hash[DL_HASH_FN(offset)]; + struct list_head *entry, *next = hash_list->next; + struct deadline_rq *drq; + struct request *rq = NULL; + + while ((entry = next) != hash_list) { + next = entry->next; + drq = list_entry_hash(entry); + + BUG_ON(!drq->hash_valid_count); + + if (!rq_mergeable(drq->request) + || drq->hash_valid_count != dd->hash_valid_count) { + __deadline_del_rq_hash(drq); + continue; + } + + if (drq->request->sector + drq->request->nr_sectors == offset) { + rq = drq->request; + break; + } + } + + return rq; +} + +static int +deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) +{ + struct deadline_data *dd = q->elevator.elevator_data; + const int data_dir = bio_data_dir(bio); + struct list_head *entry, *sort_list; + struct deadline_rq *drq; + struct request *__rq; + int ret = ELEVATOR_NO_MERGE; + + /* + * try last_merge to avoid going to hash + */ + ret = elv_try_last_merge(q, req, bio); + if (ret != ELEVATOR_NO_MERGE) + goto out; + + /* + * see if the merge hash can satisfy a back merge + */ + if ((__rq = deadline_find_hash(dd, bio->bi_sector))) { + BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); + + if (elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + q->last_merge = &__rq->queuelist; + ret = ELEVATOR_BACK_MERGE; + goto out_ret; + } + } + + entry = sort_list = &dd->sort_list[data_dir]; + while ((entry = entry->prev) != sort_list) { + __rq = list_entry_rq(entry); + drq = RQ_DATA(__rq); + + BUG_ON(__rq->flags & REQ_STARTED); + + if (!(__rq->flags & REQ_CMD)) + continue; + + if (!*req && bio_rq_in_between(bio, __rq, sort_list)) + *req = __rq; + + if (__rq->flags & REQ_BARRIER) + break; + + /* + * checking for a front merge, hash will miss those + */ + if (__rq->sector - bio_sectors(bio) == bio->bi_sector) { + ret = elv_try_merge(__rq, bio); + if (ret != ELEVATOR_NO_MERGE) { + *req = __rq; + q->last_merge = &__rq->queuelist; + break; + } + } + } + +out: + if (ret != ELEVATOR_NO_MERGE) { + struct deadline_rq *drq = RQ_DATA(*req); + + deadline_del_rq_hash(drq); + deadline_add_rq_hash(dd, drq); + } +out_ret: + return ret; +} + +static void +deadline_merge_request(request_queue_t *q, struct request *req, struct request *next) +{ + struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_rq *drq = RQ_DATA(req); + struct deadline_rq *dnext = RQ_DATA(next); + + BUG_ON(!drq); + BUG_ON(!dnext); + + deadline_del_rq_hash(drq); + deadline_add_rq_hash(dd, drq); + + /* + * if dnext expires before drq, assign it's expire time to drq + * and move into dnext position (dnext will be deleted) in fifo + */ + if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) { + if (time_before(dnext->expires, drq->expires)) { + list_move(&drq->fifo, &dnext->fifo); + drq->expires = dnext->expires; + } + } +} + +/* + * move request from sort list to dispatch queue. maybe remove from rq hash + * here too? + */ +static inline void +deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq) +{ + struct deadline_rq *drq = RQ_DATA(rq); + + list_move_tail(&rq->queuelist, dd->dispatch); + list_del_init(&drq->fifo); +} + +/* + * move along sort list and move entries to dispatch queue, starting from rq + */ +static void deadline_move_requests(struct deadline_data *dd, struct request *rq) +{ + struct list_head *sort_head = &dd->sort_list[rq_data_dir(rq)]; + sector_t last_sec = dd->last_sector; + int batch_count = dd->fifo_batch; + + do { + struct list_head *nxt = rq->queuelist.next; + + /* + * take it off the sort and fifo list, move + * to dispatch queue + */ + deadline_move_to_dispatch(dd, rq); + + if (rq->sector == last_sec) + batch_count--; + else + batch_count -= dd->seek_cost; + + if (nxt == sort_head) + break; + + last_sec = rq->sector + rq->nr_sectors; + rq = list_entry_rq(nxt); + } while (batch_count > 0); +} + +/* + * returns 0 if there are no expired reads on the fifo, 1 otherwise + */ +#define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo) +static inline int deadline_check_fifo(struct deadline_data *dd) +{ + struct deadline_rq *drq; + + if (list_empty(&dd->read_fifo)) + return 0; + + drq = list_entry_fifo(dd->read_fifo.next); + if (time_before(jiffies, drq->expires)) + return 0; + + return 1; +} + +static struct request *deadline_next_request(request_queue_t *q) +{ + struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_rq *drq; + struct list_head *nxt; + struct request *rq; + int writes; + + /* + * if still requests on the dispatch queue, just grab the first one + */ + if (!list_empty(&q->queue_head)) { +dispatch: + rq = list_entry_rq(q->queue_head.next); + dd->last_sector = rq->sector + rq->nr_sectors; + return rq; + } + + writes = !list_empty(&dd->sort_list[WRITE]); + + /* + * if we have expired entries on the fifo list, move some to dispatch + */ + if (deadline_check_fifo(dd)) { + if (writes && (dd->starved++ >= dd->writes_starved)) + goto dispatch_writes; + + nxt = dd->read_fifo.next; + drq = list_entry_fifo(nxt); + deadline_move_requests(dd, drq->request); + goto dispatch; + } + + if (!list_empty(&dd->sort_list[READ])) { + if (writes && (dd->starved++ >= dd->writes_starved)) + goto dispatch_writes; + + nxt = dd->sort_list[READ].next; + deadline_move_requests(dd, list_entry_rq(nxt)); + goto dispatch; + } + + /* + * either there are no reads expired or on sort list, or the reads + * have starved writes for too long. dispatch some writes + */ + if (writes) { +dispatch_writes: + nxt = dd->sort_list[WRITE].next; + deadline_move_requests(dd, list_entry_rq(nxt)); + dd->starved = 0; + goto dispatch; + } + + BUG_ON(!list_empty(&dd->sort_list[READ])); + BUG_ON(writes); + return NULL; +} + +static void +deadline_add_request(request_queue_t *q, struct request *rq, struct list_head *insert_here) +{ + struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_rq *drq = RQ_DATA(rq); + const int data_dir = rq_data_dir(rq); + + /* + * flush hash on barrier insert, as not to allow merges before a + * barrier. + */ + if (unlikely(rq->flags & REQ_BARRIER)) { + DL_INVALIDATE_HASH(dd); + q->last_merge = NULL; + } + + /* + * add to sort list + */ + if (!insert_here) + insert_here = dd->sort_list[data_dir].prev; + + list_add(&rq->queuelist, insert_here); + + if (unlikely(!(rq->flags & REQ_CMD))) + return; + + if (rq_mergeable(rq)) { + deadline_add_rq_hash(dd, drq); + + if (!q->last_merge) + q->last_merge = &rq->queuelist; + } + + if (data_dir == READ) { + /* + * set expire time and add to fifo list + */ + drq->expires = jiffies + dd->read_expire; + list_add_tail(&drq->fifo, &dd->read_fifo); + } +} + +static void deadline_remove_request(request_queue_t *q, struct request *rq) +{ + struct deadline_rq *drq = RQ_DATA(rq); + + if (drq) { + list_del_init(&drq->fifo); + deadline_del_rq_hash(drq); + } +} + +static int deadline_queue_empty(request_queue_t *q) +{ + struct deadline_data *dd = q->elevator.elevator_data; + + if (!list_empty(&q->queue_head) || !list_empty(&dd->sort_list[READ]) + || !list_empty(&dd->sort_list[WRITE])) + return 0; + + BUG_ON(!list_empty(&dd->read_fifo)); + return 1; +} + +static struct list_head * +deadline_get_sort_head(request_queue_t *q, struct request *rq) +{ + struct deadline_data *dd = q->elevator.elevator_data; + + return &dd->sort_list[rq_data_dir(rq)]; +} + +static void deadline_exit(request_queue_t *q, elevator_t *e) +{ + struct deadline_data *dd = e->elevator_data; + struct deadline_rq *drq; + struct request *rq; + int i; + + BUG_ON(!list_empty(&dd->read_fifo)); + BUG_ON(!list_empty(&dd->sort_list[READ])); + BUG_ON(!list_empty(&dd->sort_list[WRITE])); + + for (i = READ; i <= WRITE; i++) { + struct request_list *rl = &q->rq[i]; + struct list_head *entry = &rl->free; + + if (list_empty(&rl->free)) + continue; + + while ((entry = entry->next) != &rl->free) { + rq = list_entry_rq(entry); + + if ((drq = RQ_DATA(rq)) == NULL) + continue; + + rq->elevator_private = NULL; + kmem_cache_free(drq_pool, drq); + } + } + + kfree(dd->hash); + kfree(dd); +} + +/* + * initialize elevator private data (deadline_data), and alloc a drq for + * each request on the free lists + */ +static int deadline_init(request_queue_t *q, elevator_t *e) +{ + struct deadline_data *dd; + struct deadline_rq *drq; + struct request *rq; + int i, ret = 0; + + if (!drq_pool) + return -ENOMEM; + + dd = kmalloc(sizeof(*dd), GFP_KERNEL); + if (!dd) + return -ENOMEM; + memset(dd, 0, sizeof(*dd)); + + dd->hash = kmalloc(sizeof(struct list_head)*DL_HASH_ENTRIES,GFP_KERNEL); + if (!dd->hash) { + kfree(dd); + return -ENOMEM; + } + + for (i = 0; i < DL_HASH_ENTRIES; i++) + INIT_LIST_HEAD(&dd->hash[i]); + + INIT_LIST_HEAD(&dd->read_fifo); + INIT_LIST_HEAD(&dd->sort_list[READ]); + INIT_LIST_HEAD(&dd->sort_list[WRITE]); + dd->dispatch = &q->queue_head; + dd->fifo_batch = fifo_batch; + dd->read_expire = read_expire; + dd->seek_cost = seek_cost; + dd->hash_valid_count = 1; + dd->writes_starved = writes_starved; + e->elevator_data = dd; + + for (i = READ; i <= WRITE; i++) { + struct request_list *rl = &q->rq[i]; + struct list_head *entry = &rl->free; + + if (list_empty(&rl->free)) + continue; + + while ((entry = entry->next) != &rl->free) { + rq = list_entry_rq(entry); + + drq = kmem_cache_alloc(drq_pool, GFP_KERNEL); + if (!drq) { + ret = -ENOMEM; + break; + } + + memset(drq, 0, sizeof(*drq)); + INIT_LIST_HEAD(&drq->fifo); + INIT_LIST_HEAD(&drq->hash); + drq->request = rq; + rq->elevator_private = drq; + } + } + + if (ret) + deadline_exit(q, e); + + return ret; +} + +static int __init deadline_slab_setup(void) +{ + drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + + if (!drq_pool) + panic("deadline: can't init slab pool\n"); + + return 0; +} + +module_init(deadline_slab_setup); + +elevator_t iosched_deadline = { + .elevator_merge_fn = deadline_merge, + .elevator_merge_req_fn = deadline_merge_request, + .elevator_next_req_fn = deadline_next_request, + .elevator_add_req_fn = deadline_add_request, + .elevator_remove_req_fn = deadline_remove_request, + .elevator_queue_empty_fn = deadline_queue_empty, + .elevator_get_sort_head_fn = deadline_get_sort_head, + .elevator_init_fn = deadline_init, + .elevator_exit_fn = deadline_exit, +}; + +EXPORT_SYMBOL(iosched_deadline); diff -Nru a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c --- a/drivers/block/ll_rw_blk.c Wed Sep 25 19:16:26 2002 +++ b/drivers/block/ll_rw_blk.c Wed Sep 25 19:16:26 2002 @@ -1175,7 +1175,7 @@ if (blk_init_free_list(q)) return -ENOMEM; - if ((ret = elevator_init(q, &q->elevator, elevator_linus))) { + if ((ret = elevator_init(q, &q->elevator, iosched_deadline))) { blk_cleanup_queue(q); return ret; } diff -Nru a/include/linux/elevator.h b/include/linux/elevator.h --- a/include/linux/elevator.h Wed Sep 25 19:16:26 2002 +++ b/include/linux/elevator.h Wed Sep 25 19:16:26 2002 @@ -60,6 +60,12 @@ #define ELV_LINUS_SEEK_COST 16 /* + * deadline i/o scheduler. uses request time outs to prevent indefinite + * starvation + */ +extern elevator_t iosched_deadline; + +/* * use the /proc/iosched interface, all the below is history -> */ typedef struct blkelv_ioctl_arg_s {