linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Time sliced CFQ #2
@ 2004-12-04 10:49 Jens Axboe
  2004-12-04 16:39 ` Jeff Sipek
                   ` (2 more replies)
  0 siblings, 3 replies; 30+ messages in thread
From: Jens Axboe @ 2004-12-04 10:49 UTC (permalink / raw)
  To: Linux Kernel; +Cc: Andrew Morton

Hi,

Second version of the time sliced CFQ. Changes:

- Sync io has a fixed time slice like before, async io has both a time
  based and a request based slice limit. The queue slice is expired when
  one of these limits are reached.

- Fix a bug in invoking the request handler on a plugged queue.

- Drop the ->alloc_limit wakeup stuff, I'm not so sure it's a good idea
  and there are probably wakeup races buried there.

With the async rq slice limit, it behaves perfectly here for me with
readers competing with async writers. The main slice settings for a
queue are:

- slice_sync: How many msec a sync disk slice lasts
- slice_idle: How long a sync slice is allowed to idle
- slice_async: How many msec an async disk slice lasts
- slice_async_rq: How many requests an async disk slice lasts

Interestingly, cfq is now about 10% faster on an fsck than deadline and
as:

AS:

bart:~ # time fsck.ext2 -fy /dev/hdc1
e2fsck 1.34 (25-Jul-2003)
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Pass 4: Checking reference counts
Pass 5: Checking group summary information
/dev/hdc1: 36/3753600 files (8.3% non-contiguous), 644713/7504552 blocks

real    0m30.594s
user    0m1.862s
sys     0m5.214s


DEADLINE:

bart:~ # time fsck.ext2 -fy /dev/hdc1
e2fsck 1.34 (25-Jul-2003)
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Pass 4: Checking reference counts
Pass 5: Checking group summary information
/dev/hdc1: 36/3753600 files (8.3% non-contiguous), 644713/7504552 blocks

real    0m30.475s
user    0m1.855s
sys     0m5.280s


CFQ:

bart:~ # time fsck.ext2 -fy /dev/hdc1
e2fsck 1.34 (25-Jul-2003)
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Pass 4: Checking reference counts
Pass 5: Checking group summary information
/dev/hdc1: 36/3753600 files (8.3% non-contiguous), 644713/7504552 blocks

real    0m27.921s
user    0m1.846s
sys     0m5.648s


Patch is against 2.6.10-rc3.

Signed-off-by: Jens Axboe <axboe@suse.de>

===== drivers/block/cfq-iosched.c 1.15 vs edited =====
--- 1.15/drivers/block/cfq-iosched.c	2004-11-30 07:56:58 +01:00
+++ edited/drivers/block/cfq-iosched.c	2004-12-04 11:41:42 +01:00
@@ -22,21 +22,24 @@
 #include <linux/rbtree.h>
 #include <linux/mempool.h>
 
-static unsigned long max_elapsed_crq;
-static unsigned long max_elapsed_dispatch;
-
 /*
  * tunables
  */
 static int cfq_quantum = 4;		/* max queue in one round of service */
 static int cfq_queued = 8;		/* minimum rq allocate limit per-queue*/
-static int cfq_service = HZ;		/* period over which service is avg */
 static int cfq_fifo_expire_r = HZ / 2;	/* fifo timeout for sync requests */
 static int cfq_fifo_expire_w = 5 * HZ;	/* fifo timeout for async requests */
 static int cfq_fifo_rate = HZ / 8;	/* fifo expiry rate */
 static int cfq_back_max = 16 * 1024;	/* maximum backwards seek, in KiB */
 static int cfq_back_penalty = 2;	/* penalty of a backwards seek */
 
+static int cfq_slice_sync = HZ / 10;
+static int cfq_slice_async = HZ / 25;
+static int cfq_slice_async_rq = 8;
+static int cfq_slice_idle = HZ / 249;
+
+static int cfq_max_depth = 4;
+
 /*
  * for the hash of cfqq inside the cfqd
  */
@@ -55,6 +58,7 @@
 #define list_entry_hash(ptr)	hlist_entry((ptr), struct cfq_rq, hash)
 
 #define list_entry_cfqq(ptr)	list_entry((ptr), struct cfq_queue, cfq_list)
+#define list_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
 
 #define RQ_DATA(rq)		(rq)->elevator_private
 
@@ -76,22 +80,18 @@
 #define rq_rb_key(rq)		(rq)->sector
 
 /*
- * threshold for switching off non-tag accounting
- */
-#define CFQ_MAX_TAG		(4)
-
-/*
  * sort key types and names
  */
 enum {
 	CFQ_KEY_PGID,
 	CFQ_KEY_TGID,
+	CFQ_KEY_PID,
 	CFQ_KEY_UID,
 	CFQ_KEY_GID,
 	CFQ_KEY_LAST,
 };
 
-static char *cfq_key_types[] = { "pgid", "tgid", "uid", "gid", NULL };
+static char *cfq_key_types[] = { "pgid", "tgid", "pid", "uid", "gid", NULL };
 
 /*
  * spare queue
@@ -103,6 +103,8 @@
 static kmem_cache_t *cfq_ioc_pool;
 
 struct cfq_data {
+	atomic_t ref;
+
 	struct list_head rr_list;
 	struct list_head empty_list;
 
@@ -114,8 +116,6 @@
 
 	unsigned int max_queued;
 
-	atomic_t ref;
-
 	int key_type;
 
 	mempool_t *crq_pool;
@@ -127,6 +127,14 @@
 	int rq_in_driver;
 
 	/*
+	 * schedule slice state info
+	 */
+	struct timer_list timer;
+	struct work_struct unplug_work;
+	struct cfq_queue *active_queue;
+	unsigned int dispatch_slice;
+
+	/*
 	 * tunables, see top of file
 	 */
 	unsigned int cfq_quantum;
@@ -137,8 +145,10 @@
 	unsigned int cfq_back_penalty;
 	unsigned int cfq_back_max;
 	unsigned int find_best_crq;
-
-	unsigned int cfq_tagged;
+	unsigned int cfq_slice[2];
+	unsigned int cfq_slice_async_rq;
+	unsigned int cfq_slice_idle;
+	unsigned int cfq_max_depth;
 };
 
 struct cfq_queue {
@@ -150,8 +160,6 @@
 	struct hlist_node cfq_hash;
 	/* hash key */
 	unsigned long key;
-	/* whether queue is on rr (or empty) list */
-	int on_rr;
 	/* on either rr or empty list of cfqd */
 	struct list_head cfq_list;
 	/* sorted list of pending requests */
@@ -169,15 +177,17 @@
 
 	int key_type;
 
-	unsigned long service_start;
-	unsigned long service_used;
+	unsigned long slice_start;
+	unsigned long slice_end;
+	unsigned long service_last;
 
-	unsigned int max_rate;
+	/* whether queue is on rr (or empty) list */
+	unsigned int on_rr : 1;
+	unsigned int wait_request : 1;
+	unsigned int must_dispatch : 1;
 
 	/* number of requests that have been handed to the driver */
 	int in_flight;
-	/* number of currently allocated requests */
-	int alloc_limit[2];
 };
 
 struct cfq_rq {
@@ -195,7 +205,6 @@
 	unsigned int in_flight : 1;
 	unsigned int accounted : 1;
 	unsigned int is_sync   : 1;
-	unsigned int is_write  : 1;
 };
 
 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned long);
@@ -219,6 +228,8 @@
 		default:
 		case CFQ_KEY_TGID:
 			return tsk->tgid;
+		case CFQ_KEY_PID:
+			return tsk->pid;
 		case CFQ_KEY_UID:
 			return tsk->uid;
 		case CFQ_KEY_GID:
@@ -406,67 +417,22 @@
 		cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
 }
 
-static int cfq_check_sort_rr_list(struct cfq_queue *cfqq)
-{
-	struct list_head *head = &cfqq->cfqd->rr_list;
-	struct list_head *next, *prev;
-
-	/*
-	 * list might still be ordered
-	 */
-	next = cfqq->cfq_list.next;
-	if (next != head) {
-		struct cfq_queue *cnext = list_entry_cfqq(next);
-
-		if (cfqq->service_used > cnext->service_used)
-			return 1;
-	}
-
-	prev = cfqq->cfq_list.prev;
-	if (prev != head) {
-		struct cfq_queue *cprev = list_entry_cfqq(prev);
-
-		if (cfqq->service_used < cprev->service_used)
-			return 1;
-	}
-
-	return 0;
-}
-
-static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
+static void cfq_resort_rr_list(struct cfq_queue *cfqq)
 {
 	struct list_head *entry = &cfqq->cfqd->rr_list;
 
-	if (!cfqq->on_rr)
-		return;
-	if (!new_queue && !cfq_check_sort_rr_list(cfqq))
-		return;
-
 	list_del(&cfqq->cfq_list);
 
 	/*
-	 * sort by our mean service_used, sub-sort by in-flight requests
+	 * sort by when queue was last serviced
 	 */
 	while ((entry = entry->prev) != &cfqq->cfqd->rr_list) {
 		struct cfq_queue *__cfqq = list_entry_cfqq(entry);
 
-		if (cfqq->service_used > __cfqq->service_used)
+		if (!__cfqq->service_last)
+			break;
+		if (time_before(__cfqq->service_last, cfqq->service_last))
 			break;
-		else if (cfqq->service_used == __cfqq->service_used) {
-			struct list_head *prv;
-
-			while ((prv = entry->prev) != &cfqq->cfqd->rr_list) {
-				__cfqq = list_entry_cfqq(prv);
-
-				WARN_ON(__cfqq->service_used > cfqq->service_used);
-				if (cfqq->service_used != __cfqq->service_used)
-					break;
-				if (cfqq->in_flight > __cfqq->in_flight)
-					break;
-
-				entry = prv;
-			}
-		}
 	}
 
 	list_add(&cfqq->cfq_list, entry);
@@ -479,16 +445,12 @@
 static inline void
 cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	/*
-	 * it's currently on the empty list
-	 */
-	cfqq->on_rr = 1;
-	cfqd->busy_queues++;
+	BUG_ON(cfqq->on_rr);
 
-	if (time_after(jiffies, cfqq->service_start + cfq_service))
-		cfqq->service_used >>= 3;
+	cfqd->busy_queues++;
+	cfqq->on_rr = 1;
 
-	cfq_sort_rr_list(cfqq, 1);
+	cfq_resort_rr_list(cfqq);
 }
 
 static inline void
@@ -512,10 +474,10 @@
 		struct cfq_data *cfqd = cfqq->cfqd;
 
 		BUG_ON(!cfqq->queued[crq->is_sync]);
+		cfqq->queued[crq->is_sync]--;
 
 		cfq_update_next_crq(crq);
 
-		cfqq->queued[crq->is_sync]--;
 		rb_erase(&crq->rb_node, &cfqq->sort_list);
 		RB_CLEAR_COLOR(&crq->rb_node);
 
@@ -622,11 +584,6 @@
 	if (crq) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
 
-		if (cfqq->cfqd->cfq_tagged) {
-			cfqq->service_used--;
-			cfq_sort_rr_list(cfqq, 0);
-		}
-
 		crq->accounted = 0;
 		cfqq->cfqd->rq_in_driver--;
 	}
@@ -640,9 +597,7 @@
 	if (crq) {
 		cfq_remove_merge_hints(q, crq);
 		list_del_init(&rq->queuelist);
-
-		if (crq->cfq_queue)
-			cfq_del_crq_rb(crq);
+		cfq_del_crq_rb(crq);
 	}
 }
 
@@ -724,6 +679,99 @@
 }
 
 /*
+ * current cfqq expired its slice (or was too idle), select new one
+ */
+static inline void cfq_slice_expired(struct cfq_data *cfqd)
+{
+	struct cfq_queue *cfqq = cfqd->active_queue;
+	unsigned long now = jiffies;
+
+	if (cfqq) {
+		if (cfqq->wait_request)
+			del_timer(&cfqd->timer);
+
+		cfqq->service_last = now;
+		cfqq->must_dispatch = 0;
+		cfqq->wait_request = 0;
+
+		if (cfqq->on_rr)
+			cfq_resort_rr_list(cfqq);
+
+		cfqq = NULL;
+	}
+
+	if (!list_empty(&cfqd->rr_list)) {
+		cfqq = list_entry_cfqq(cfqd->rr_list.next);
+
+		cfqq->slice_start = now;
+		cfqq->slice_end = 0;
+		cfqq->wait_request = 0;
+	}
+
+	cfqd->active_queue = cfqq;
+	cfqd->dispatch_slice = 0;
+}
+
+static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+	WARN_ON(!RB_EMPTY(&cfqq->sort_list));
+
+	cfqq->wait_request = 1;
+
+	if (!cfqd->cfq_slice_idle)
+		return 0;
+
+	if (!timer_pending(&cfqd->timer)) {
+		unsigned long now = jiffies, slice_left;
+
+		slice_left = cfqq->slice_end - now;
+		cfqd->timer.expires = now + min(cfqd->cfq_slice_idle, (unsigned int)slice_left);
+		add_timer(&cfqd->timer);
+	}
+
+	return 1;
+}
+
+/*
+ * get next queue for service
+ */
+static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
+{
+	struct cfq_queue *cfqq = cfqd->active_queue;
+	unsigned long now = jiffies;
+
+	cfqq = cfqd->active_queue;
+	if (!cfqq)
+		goto new_queue;
+
+	if (cfqq->must_dispatch)
+		goto must_queue;
+
+	/*
+	 * slice has expired
+	 */
+	if (time_after(jiffies, cfqq->slice_end))
+		goto new_queue;
+
+	/*
+	 * if queue has requests, dispatch one. if not, check if
+	 * enough slice is left to wait for one
+	 */
+must_queue:
+	if (!RB_EMPTY(&cfqq->sort_list))
+		goto keep_queue;
+	else if (cfqq->slice_end - now >= cfqd->cfq_slice_idle) {
+		if (cfq_arm_slice_timer(cfqd, cfqq))
+			return NULL;
+	}
+
+new_queue:
+	cfq_slice_expired(cfqd);
+keep_queue:
+	return cfqd->active_queue;
+}
+
+/*
  * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues,
  * this function sector sorts the selected request to minimize seeks. we start
  * at cfqd->last_sector, not 0.
@@ -741,9 +789,7 @@
 	list_del(&crq->request->queuelist);
 
 	last = cfqd->last_sector;
-	while ((entry = entry->prev) != head) {
-		__rq = list_entry_rq(entry);
-
+	list_for_each_entry_reverse(__rq, head, queuelist) {
 		if (blk_barrier_rq(crq->request))
 			break;
 		if (!blk_fs_request(crq->request))
@@ -777,95 +823,100 @@
 	if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire))
 		return NULL;
 
-	crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist));
-	if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
-		cfqq->last_fifo_expire = now;
-		return crq;
+	if (reads) {
+		crq = RQ_DATA(list_entry_fifo(cfqq->fifo[READ].next));
+		if (time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
+			cfqq->last_fifo_expire = now;
+			return crq;
+		}
 	}
 
-	crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist));
-	if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
-		cfqq->last_fifo_expire = now;
-		return crq;
+	if (writes) {
+		crq = RQ_DATA(list_entry_fifo(cfqq->fifo[WRITE].next));
+		if (time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
+			cfqq->last_fifo_expire = now;
+			return crq;
+		}
 	}
 
 	return NULL;
 }
 
-/*
- * dispatch a single request from given queue
- */
-static inline void
-cfq_dispatch_request(request_queue_t *q, struct cfq_data *cfqd,
-		     struct cfq_queue *cfqq)
+static int
+__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+			int max_dispatch)
 {
-	struct cfq_rq *crq;
+	int dispatched = 0, sync = 0;
+
+	BUG_ON(RB_EMPTY(&cfqq->sort_list));
+
+	do {
+		struct cfq_rq *crq;
+
+		/*
+		 * follow expired path, else get first next available
+		 */
+		if ((crq = cfq_check_fifo(cfqq)) == NULL) {
+			if (cfqd->find_best_crq)
+				crq = cfqq->next_crq;
+			else
+				crq = rb_entry_crq(rb_first(&cfqq->sort_list));
+		}
+
+		cfqd->last_sector = crq->request->sector + crq->request->nr_sectors;
+
+		/*
+		 * finally, insert request into driver list
+		 */
+		cfq_dispatch_sort(cfqd->queue, crq);
+
+		cfqd->dispatch_slice++;
+		dispatched++;
+		sync += crq->is_sync;
+
+		if (RB_EMPTY(&cfqq->sort_list))
+			break;
+
+	} while (dispatched < max_dispatch);
 
 	/*
-	 * follow expired path, else get first next available
+	 * if slice end isn't set yet, set it. if at least one request was
+	 * sync, use the sync time slice value
 	 */
-	if ((crq = cfq_check_fifo(cfqq)) == NULL) {
-		if (cfqd->find_best_crq)
-			crq = cfqq->next_crq;
-		else
-			crq = rb_entry_crq(rb_first(&cfqq->sort_list));
-	}
-
-	cfqd->last_sector = crq->request->sector + crq->request->nr_sectors;
+	if (!cfqq->slice_end)
+		cfqq->slice_end = cfqd->cfq_slice[!!sync] + jiffies;
 
 	/*
-	 * finally, insert request into driver list
+	 * expire an async queue immediately if it has used up its tq slice
 	 */
-	cfq_dispatch_sort(q, crq);
+	if (!sync && cfqd->dispatch_slice >= cfqd->cfq_slice_async_rq)
+		cfq_slice_expired(cfqd);
+
+	return dispatched;
 }
 
 static int cfq_dispatch_requests(request_queue_t *q, int max_dispatch)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq;
-	struct list_head *entry, *tmp;
-	int queued, busy_queues, first_round;
 
 	if (list_empty(&cfqd->rr_list))
 		return 0;
 
-	queued = 0;
-	first_round = 1;
-restart:
-	busy_queues = 0;
-	list_for_each_safe(entry, tmp, &cfqd->rr_list) {
-		cfqq = list_entry_cfqq(entry);
-
-		BUG_ON(RB_EMPTY(&cfqq->sort_list));
-
-		/*
-		 * first round of queueing, only select from queues that
-		 * don't already have io in-flight
-		 */
-		if (first_round && cfqq->in_flight)
-			continue;
-
-		cfq_dispatch_request(q, cfqd, cfqq);
-
-		if (!RB_EMPTY(&cfqq->sort_list))
-			busy_queues++;
-
-		queued++;
-	}
-
-	if ((queued < max_dispatch) && (busy_queues || first_round)) {
-		first_round = 0;
-		goto restart;
-	}
+	cfqq = cfq_select_queue(cfqd);
+	if (!cfqq)
+		return 0;
 
-	return queued;
+	cfqq->wait_request = 0;
+	cfqq->must_dispatch = 0;
+	del_timer(&cfqd->timer);
+	return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
 }
 
 static inline void cfq_account_dispatch(struct cfq_rq *crq)
 {
 	struct cfq_queue *cfqq = crq->cfq_queue;
 	struct cfq_data *cfqd = cfqq->cfqd;
-	unsigned long now, elapsed;
 
 	/*
 	 * accounted bit is necessary since some drivers will call
@@ -874,37 +925,9 @@
 	if (crq->accounted)
 		return;
 
-	now = jiffies;
-	if (cfqq->service_start == ~0UL)
-		cfqq->service_start = now;
-
-	/*
-	 * on drives with tagged command queueing, command turn-around time
-	 * doesn't necessarily reflect the time spent processing this very
-	 * command inside the drive. so do the accounting differently there,
-	 * by just sorting on the number of requests
-	 */
-	if (cfqd->cfq_tagged) {
-		if (time_after(now, cfqq->service_start + cfq_service)) {
-			cfqq->service_start = now;
-			cfqq->service_used /= 10;
-		}
-
-		cfqq->service_used++;
-		cfq_sort_rr_list(cfqq, 0);
-	}
-
-	elapsed = now - crq->queue_start;
-	if (elapsed > max_elapsed_dispatch)
-		max_elapsed_dispatch = elapsed;
-
 	crq->accounted = 1;
-	crq->service_start = now;
-
-	if (++cfqd->rq_in_driver >= CFQ_MAX_TAG && !cfqd->cfq_tagged) {
-		cfqq->cfqd->cfq_tagged = 1;
-		printk("cfq: depth %d reached, tagging now on\n", CFQ_MAX_TAG);
-	}
+	crq->service_start = jiffies;
+	cfqd->rq_in_driver++;
 }
 
 static inline void
@@ -915,21 +938,18 @@
 	WARN_ON(!cfqd->rq_in_driver);
 	cfqd->rq_in_driver--;
 
-	if (!cfqd->cfq_tagged) {
-		unsigned long now = jiffies;
-		unsigned long duration = now - crq->service_start;
-
-		if (time_after(now, cfqq->service_start + cfq_service)) {
-			cfqq->service_start = now;
-			cfqq->service_used >>= 3;
-		}
-
-		cfqq->service_used += duration;
-		cfq_sort_rr_list(cfqq, 0);
+	/*
+	 * queue was preempted while this request was servicing
+	 */
+	if (cfqd->active_queue != cfqq)
+		return;
 
-		if (duration > max_elapsed_crq)
-			max_elapsed_crq = duration;
-	}
+	/*
+	 * no requests. if last request was a sync request, wait for
+	 * a new one.
+	 */
+	if (RB_EMPTY(&cfqq->sort_list) && crq->is_sync)
+		cfq_arm_slice_timer(cfqd, cfqq);
 }
 
 static struct request *cfq_next_request(request_queue_t *q)
@@ -937,6 +957,9 @@
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct request *rq;
 
+	if (cfqd->rq_in_driver >= cfqd->cfq_max_depth)
+		return NULL;
+
 	if (!list_empty(&q->queue_head)) {
 		struct cfq_rq *crq;
 dispatch:
@@ -964,6 +987,8 @@
  */
 static void cfq_put_queue(struct cfq_queue *cfqq)
 {
+	struct cfq_data *cfqd = cfqq->cfqd;
+
 	BUG_ON(!atomic_read(&cfqq->ref));
 
 	if (!atomic_dec_and_test(&cfqq->ref))
@@ -972,6 +997,9 @@
 	BUG_ON(rb_first(&cfqq->sort_list));
 	BUG_ON(cfqq->on_rr);
 
+	if (unlikely(cfqd->active_queue == cfqq))
+		cfqd->active_queue = NULL;
+
 	cfq_put_cfqd(cfqq->cfqd);
 
 	/*
@@ -1117,6 +1145,7 @@
 		cic->ioc = ioc;
 		cic->cfqq = __cfqq;
 		atomic_inc(&__cfqq->ref);
+		atomic_inc(&cfqd->ref);
 	} else {
 		struct cfq_io_context *__cic;
 		unsigned long flags;
@@ -1159,10 +1188,10 @@
 		__cic->ioc = ioc;
 		__cic->cfqq = __cfqq;
 		atomic_inc(&__cfqq->ref);
+		atomic_inc(&cfqd->ref);
 		spin_lock_irqsave(&ioc->lock, flags);
 		list_add(&__cic->list, &cic->list);
 		spin_unlock_irqrestore(&ioc->lock, flags);
-
 		cic = __cic;
 		*cfqq = __cfqq;
 	}
@@ -1199,8 +1228,11 @@
 			new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
 			spin_lock_irq(cfqd->queue->queue_lock);
 			goto retry;
-		} else
-			goto out;
+		} else {
+			cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+			if (!cfqq)
+				goto out;
+		}
 
 		memset(cfqq, 0, sizeof(*cfqq));
 
@@ -1216,7 +1248,7 @@
 		cfqq->cfqd = cfqd;
 		atomic_inc(&cfqd->ref);
 		cfqq->key_type = cfqd->key_type;
-		cfqq->service_start = ~0UL;
+		cfqq->service_last = 0;
 	}
 
 	if (new_cfqq)
@@ -1243,14 +1275,31 @@
 
 static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq)
 {
-	crq->is_sync = 0;
-	if (rq_data_dir(crq->request) == READ || current->flags & PF_SYNCWRITE)
-		crq->is_sync = 1;
+	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct request *rq = crq->request;
+
+	crq->is_sync = rq_data_dir(rq) == READ || current->flags & PF_SYNCWRITE;
 
 	cfq_add_crq_rb(crq);
 	crq->queue_start = jiffies;
 
-	list_add_tail(&crq->request->queuelist, &crq->cfq_queue->fifo[crq->is_sync]);
+	list_add_tail(&rq->queuelist, &cfqq->fifo[crq->is_sync]);
+
+	/*
+	 * if we are waiting for a request for this queue, let it rip
+	 * immediately and flag that we must not expire this queue just now
+	 */
+	if (cfqq->wait_request && cfqq == cfqd->active_queue) {
+		request_queue_t *q = cfqd->queue;
+
+		cfqq->must_dispatch = 1;
+		del_timer(&cfqd->timer);
+
+		if (!blk_queue_plugged(q))
+			q->request_fn(q);
+		else
+			__generic_unplug_device(q);
+	}
 }
 
 static void
@@ -1339,32 +1388,31 @@
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq;
 	int ret = ELV_MQUEUE_MAY;
+	int limit;
 
 	if (current->flags & PF_MEMALLOC)
 		return ELV_MQUEUE_MAY;
 
 	cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(cfqd, current));
-	if (cfqq) {
-		int limit = cfqd->max_queued;
-
-		if (cfqq->allocated[rw] < cfqd->cfq_queued)
-			return ELV_MQUEUE_MUST;
-
-		if (cfqd->busy_queues)
-			limit = q->nr_requests / cfqd->busy_queues;
-
-		if (limit < cfqd->cfq_queued)
-			limit = cfqd->cfq_queued;
-		else if (limit > cfqd->max_queued)
-			limit = cfqd->max_queued;
+	if (unlikely(!cfqq))
+		return ELV_MQUEUE_MAY;
 
-		if (cfqq->allocated[rw] >= limit) {
-			if (limit > cfqq->alloc_limit[rw])
-				cfqq->alloc_limit[rw] = limit;
+	if (cfqq->allocated[rw] < cfqd->cfq_queued)
+		return ELV_MQUEUE_MUST;
+	if (cfqq->wait_request)
+		return ELV_MQUEUE_MUST;
+
+	limit = cfqd->max_queued;
+	if (cfqd->busy_queues)
+		limit = q->nr_requests / cfqd->busy_queues;
+
+	if (limit < cfqd->cfq_queued)
+		limit = cfqd->cfq_queued;
+	else if (limit > cfqd->max_queued)
+		limit = cfqd->max_queued;
 
-			ret = ELV_MQUEUE_NO;
-		}
-	}
+	if (cfqq->allocated[rw] >= limit)
+		ret = ELV_MQUEUE_NO;
 
 	return ret;
 }
@@ -1372,12 +1420,13 @@
 static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
 {
 	struct request_list *rl = &q->rq;
-	const int write = waitqueue_active(&rl->wait[WRITE]);
-	const int read = waitqueue_active(&rl->wait[READ]);
+	const int writes = waitqueue_active(&rl->wait[WRITE]);
+	const int reads = waitqueue_active(&rl->wait[READ]);
+	struct cfq_data *cfqd = q->elevator->elevator_data;
 
-	if (read && cfqq->allocated[READ] < cfqq->alloc_limit[READ])
+	if (reads && cfqq->allocated[READ] < cfqd->max_queued)
 		wake_up(&rl->wait[READ]);
-	if (write && cfqq->allocated[WRITE] < cfqq->alloc_limit[WRITE])
+	if (writes && cfqq->allocated[WRITE] < cfqd->max_queued)
 		wake_up(&rl->wait[WRITE]);
 }
 
@@ -1391,16 +1440,17 @@
 
 	if (crq) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
+		const int rw = rq_data_dir(rq);
 
 		BUG_ON(q->last_merge == rq);
 		BUG_ON(!hlist_unhashed(&crq->hash));
 
+		BUG_ON(!cfqq->allocated[rw]);
+		cfqq->allocated[rw]--;
+
 		if (crq->io_context)
 			put_io_context(crq->io_context->ioc);
 
-		BUG_ON(!cfqq->allocated[crq->is_write]);
-		cfqq->allocated[crq->is_write]--;
-
 		mempool_free(crq, cfqd->crq_pool);
 		rq->elevator_private = NULL;
 
@@ -1470,9 +1520,7 @@
 		crq->io_context = cic;
 		crq->service_start = crq->queue_start = 0;
 		crq->in_flight = crq->accounted = crq->is_sync = 0;
-		crq->is_write = rw;
 		rq->elevator_private = crq;
-		cfqq->alloc_limit[rw] = 0;
 		return 0;
 	}
 
@@ -1486,6 +1534,44 @@
 	return 1;
 }
 
+static void cfq_kick_queue(void *data)
+{
+	request_queue_t *q = data;
+
+	blk_run_queue(q);
+}
+
+static void cfq_schedule_timer(unsigned long data)
+{
+	struct cfq_data *cfqd = (struct cfq_data *) data;
+	struct cfq_queue *cfqq;
+	unsigned long flags;
+
+	spin_lock_irqsave(cfqd->queue->queue_lock, flags);
+
+	if ((cfqq = cfqd->active_queue) != NULL) {
+		/*
+		 * expired
+		 */
+		if (time_after(jiffies, cfqq->slice_end))
+			goto out;
+
+		/*
+		 * not expired and it has a request pending, let it dispatch
+		 */
+		if (!RB_EMPTY(&cfqq->sort_list)) {
+			cfqq->must_dispatch = 1;
+			goto out_cont;
+		}
+	} 
+
+out:
+	cfq_slice_expired(cfqd);
+out_cont:
+	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+	kblockd_schedule_work(&cfqd->unplug_work);
+}
+
 static void cfq_put_cfqd(struct cfq_data *cfqd)
 {
 	request_queue_t *q = cfqd->queue;
@@ -1494,6 +1580,8 @@
 	if (!atomic_dec_and_test(&cfqd->ref))
 		return;
 
+	blk_sync_queue(q);
+
 	/*
 	 * kill spare queue, getting it means we have two refences to it.
 	 * drop both
@@ -1565,10 +1653,17 @@
 	 * some requests. fairness is handled differently
 	 */
 	q->nr_requests = 1024;
-	cfqd->max_queued = q->nr_requests / 16;
+	cfqd->max_queued = q->nr_requests / 8;
 	q->nr_batching = cfq_queued;
-	cfqd->key_type = CFQ_KEY_TGID;
+	cfqd->key_type = CFQ_KEY_PID;
 	cfqd->find_best_crq = 1;
+
+	init_timer(&cfqd->timer);
+	cfqd->timer.function = cfq_schedule_timer;
+	cfqd->timer.data = (unsigned long) cfqd;
+
+	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
+
 	atomic_set(&cfqd->ref, 1);
 
 	cfqd->cfq_queued = cfq_queued;
@@ -1578,6 +1673,11 @@
 	cfqd->cfq_fifo_batch_expire = cfq_fifo_rate;
 	cfqd->cfq_back_max = cfq_back_max;
 	cfqd->cfq_back_penalty = cfq_back_penalty;
+	cfqd->cfq_slice[0] = cfq_slice_async;
+	cfqd->cfq_slice[1] = cfq_slice_sync;
+	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
+	cfqd->cfq_slice_idle = cfq_slice_idle;
+	cfqd->cfq_max_depth = cfq_max_depth;
 
 	return 0;
 out_spare:
@@ -1624,7 +1724,6 @@
 	return -ENOMEM;
 }
 
-
 /*
  * sysfs parts below -->
  */
@@ -1650,13 +1749,6 @@
 }
 
 static ssize_t
-cfq_clear_elapsed(struct cfq_data *cfqd, const char *page, size_t count)
-{
-	max_elapsed_dispatch = max_elapsed_crq = 0;
-	return count;
-}
-
-static ssize_t
 cfq_set_key_type(struct cfq_data *cfqd, const char *page, size_t count)
 {
 	spin_lock_irq(cfqd->queue->queue_lock);
@@ -1664,6 +1756,8 @@
 		cfqd->key_type = CFQ_KEY_PGID;
 	else if (!strncmp(page, "tgid", 4))
 		cfqd->key_type = CFQ_KEY_TGID;
+	else if (!strncmp(page, "pid", 3))
+		cfqd->key_type = CFQ_KEY_PID;
 	else if (!strncmp(page, "uid", 3))
 		cfqd->key_type = CFQ_KEY_UID;
 	else if (!strncmp(page, "gid", 3))
@@ -1704,6 +1798,11 @@
 SHOW_FUNCTION(cfq_find_best_show, cfqd->find_best_crq, 0);
 SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0);
 SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0);
+SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
+SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
+SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
+SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
+SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -1729,6 +1828,11 @@
 STORE_FUNCTION(cfq_find_best_store, &cfqd->find_best_crq, 0, 1, 0);
 STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
 STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);
+STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
+STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
+STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
+STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
+STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 2, UINT_MAX, 0);
 #undef STORE_FUNCTION
 
 static struct cfq_fs_entry cfq_quantum_entry = {
@@ -1771,15 +1875,36 @@
 	.show = cfq_back_penalty_show,
 	.store = cfq_back_penalty_store,
 };
-static struct cfq_fs_entry cfq_clear_elapsed_entry = {
-	.attr = {.name = "clear_elapsed", .mode = S_IWUSR },
-	.store = cfq_clear_elapsed,
+static struct cfq_fs_entry cfq_slice_sync_entry = {
+	.attr = {.name = "slice_sync", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_slice_sync_show,
+	.store = cfq_slice_sync_store,
+};
+static struct cfq_fs_entry cfq_slice_async_entry = {
+	.attr = {.name = "slice_async", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_slice_async_show,
+	.store = cfq_slice_async_store,
+};
+static struct cfq_fs_entry cfq_slice_async_rq_entry = {
+	.attr = {.name = "slice_async_rq", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_slice_async_rq_show,
+	.store = cfq_slice_async_rq_store,
+};
+static struct cfq_fs_entry cfq_slice_idle_entry = {
+	.attr = {.name = "slice_idle", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_slice_idle_show,
+	.store = cfq_slice_idle_store,
 };
 static struct cfq_fs_entry cfq_key_type_entry = {
 	.attr = {.name = "key_type", .mode = S_IRUGO | S_IWUSR },
 	.show = cfq_read_key_type,
 	.store = cfq_set_key_type,
 };
+static struct cfq_fs_entry cfq_max_depth_entry = {
+	.attr = {.name = "max_depth", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_max_depth_show,
+	.store = cfq_max_depth_store,
+};
 
 static struct attribute *default_attrs[] = {
 	&cfq_quantum_entry.attr,
@@ -1791,7 +1916,11 @@
 	&cfq_find_best_entry.attr,
 	&cfq_back_max_entry.attr,
 	&cfq_back_penalty_entry.attr,
-	&cfq_clear_elapsed_entry.attr,
+	&cfq_slice_sync_entry.attr,
+	&cfq_slice_async_entry.attr,
+	&cfq_slice_async_rq_entry.attr,
+	&cfq_slice_idle_entry.attr,
+	&cfq_max_depth_entry.attr,
 	NULL,
 };
 
@@ -1856,7 +1985,7 @@
 	.elevator_owner =	THIS_MODULE,
 };
 
-int cfq_init(void)
+static int __init cfq_init(void)
 {
 	int ret;
 
@@ -1864,17 +1993,34 @@
 		return -ENOMEM;
 
 	ret = elv_register(&iosched_cfq);
-	if (!ret) {
-		__module_get(THIS_MODULE);
-		return 0;
-	}
+	if (ret)
+		cfq_slab_kill();
 
-	cfq_slab_kill();
 	return ret;
 }
 
 static void __exit cfq_exit(void)
 {
+	struct task_struct *g, *p;
+	unsigned long flags;
+
+	read_lock_irqsave(&tasklist_lock, flags);
+
+	/*
+	 * iterate each process in the system, removing our io_context
+	 */
+	do_each_thread(g, p) {
+		struct io_context *ioc = p->io_context;
+
+		if (ioc && ioc->cic) {
+			ioc->cic->exit(ioc->cic);
+			cfq_free_io_context(ioc->cic);
+			ioc->cic = NULL;
+		}
+	} while_each_thread(g, p);
+
+	read_unlock_irqrestore(&tasklist_lock, flags);
+
 	cfq_slab_kill();
 	elv_unregister(&iosched_cfq);
 }
===== drivers/block/ll_rw_blk.c 1.281 vs edited =====
--- 1.281/drivers/block/ll_rw_blk.c	2004-12-01 09:13:57 +01:00
+++ edited/drivers/block/ll_rw_blk.c	2004-12-03 13:34:28 +01:00
@@ -1257,11 +1257,7 @@
 	if (!blk_remove_plug(q))
 		return;
 
-	/*
-	 * was plugged, fire request_fn if queue has stuff to do
-	 */
-	if (elv_next_request(q))
-		q->request_fn(q);
+	q->request_fn(q);
 }
 EXPORT_SYMBOL(__generic_unplug_device);
 
@@ -2152,7 +2148,6 @@
 		return;
 
 	req->rq_status = RQ_INACTIVE;
-	req->q = NULL;
 	req->rl = NULL;
 
 	/*
@@ -2502,6 +2497,7 @@
 {
 	struct request_list *rl = &q->rq;
 	struct request *rq;
+	int requeued = 0;
 
 	spin_lock_irq(q->queue_lock);
 	clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
@@ -2510,8 +2506,12 @@
 		rq = list_entry_rq(q->drain_list.next);
 
 		list_del_init(&rq->queuelist);
-		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
+		elv_requeue_request(q, rq);
+		requeued++;
 	}
+
+	if (requeued)
+		q->request_fn(q);
 
 	spin_unlock_irq(q->queue_lock);
 


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-04 10:49 [PATCH] Time sliced CFQ #2 Jens Axboe
@ 2004-12-04 16:39 ` Jeff Sipek
  2004-12-05 18:58   ` Jens Axboe
  2004-12-05 14:21 ` Ed Tomlinson
  2004-12-06  9:31 ` Prakash K. Cheemplavam
  2 siblings, 1 reply; 30+ messages in thread
From: Jeff Sipek @ 2004-12-04 16:39 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linux Kernel

[-- Attachment #1: Type: text/plain, Size: 1090 bytes --]

On Sat, Dec 04, 2004 at 11:49:21AM +0100, Jens Axboe wrote:
> Hi,
> 
> Second version of the time sliced CFQ. Changes:
> 
> - Sync io has a fixed time slice like before, async io has both a time
>   based and a request based slice limit. The queue slice is expired when
>   one of these limits are reached.
> 
> - Fix a bug in invoking the request handler on a plugged queue.
> 
> - Drop the ->alloc_limit wakeup stuff, I'm not so sure it's a good idea
>   and there are probably wakeup races buried there.
> 
> With the async rq slice limit, it behaves perfectly here for me with
> readers competing with async writers. The main slice settings for a
> queue are:
> 
> - slice_sync: How many msec a sync disk slice lasts
> - slice_idle: How long a sync slice is allowed to idle
> - slice_async: How many msec an async disk slice lasts
> - slice_async_rq: How many requests an async disk slice lasts

This looks very nice. And from your previous post (with version #1) it
would look like you made my attempt at io priorities easier. We'll see
;-)

Thanks,
Jeff.

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-04 10:49 [PATCH] Time sliced CFQ #2 Jens Axboe
  2004-12-04 16:39 ` Jeff Sipek
@ 2004-12-05 14:21 ` Ed Tomlinson
  2004-12-05 15:18   ` Jens Axboe
  2004-12-06  9:31 ` Prakash K. Cheemplavam
  2 siblings, 1 reply; 30+ messages in thread
From: Ed Tomlinson @ 2004-12-05 14:21 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linux Kernel

Jens,

Booting 10-rc3 with this patch applied hangs when I su from root
to my working user.  Same kernel without elevator=cfq works.   By
hangs I mean that the su does not complete nor do logins to other
ids work.  The sysrq keys are active.  

Please let me know what other info will help.

Ed Tomlinson

On Saturday 04 December 2004 05:49, Jens Axboe wrote:
> Hi,
> 
> Second version of the time sliced CFQ. Changes:
> 
> - Sync io has a fixed time slice like before, async io has both a time
>   based and a request based slice limit. The queue slice is expired when
>   one of these limits are reached.
> 
> - Fix a bug in invoking the request handler on a plugged queue.
> 
> - Drop the ->alloc_limit wakeup stuff, I'm not so sure it's a good idea
>   and there are probably wakeup races buried there.
> 
> With the async rq slice limit, it behaves perfectly here for me with
> readers competing with async writers. The main slice settings for a
> queue are:
> 
> - slice_sync: How many msec a sync disk slice lasts
> - slice_idle: How long a sync slice is allowed to idle
> - slice_async: How many msec an async disk slice lasts
> - slice_async_rq: How many requests an async disk slice lasts
> 
> Interestingly, cfq is now about 10% faster on an fsck than deadline and
> as:
> 
> AS:
> 
> bart:~ # time fsck.ext2 -fy /dev/hdc1
> e2fsck 1.34 (25-Jul-2003)
> Pass 1: Checking inodes, blocks, and sizes
> Pass 2: Checking directory structure
> Pass 3: Checking directory connectivity
> Pass 4: Checking reference counts
> Pass 5: Checking group summary information
> /dev/hdc1: 36/3753600 files (8.3% non-contiguous), 644713/7504552 blocks
> 
> real    0m30.594s
> user    0m1.862s
> sys     0m5.214s
> 
> 
> DEADLINE:
> 
> bart:~ # time fsck.ext2 -fy /dev/hdc1
> e2fsck 1.34 (25-Jul-2003)
> Pass 1: Checking inodes, blocks, and sizes
> Pass 2: Checking directory structure
> Pass 3: Checking directory connectivity
> Pass 4: Checking reference counts
> Pass 5: Checking group summary information
> /dev/hdc1: 36/3753600 files (8.3% non-contiguous), 644713/7504552 blocks
> 
> real    0m30.475s
> user    0m1.855s
> sys     0m5.280s
> 
> 
> CFQ:
> 
> bart:~ # time fsck.ext2 -fy /dev/hdc1
> e2fsck 1.34 (25-Jul-2003)
> Pass 1: Checking inodes, blocks, and sizes
> Pass 2: Checking directory structure
> Pass 3: Checking directory connectivity
> Pass 4: Checking reference counts
> Pass 5: Checking group summary information
> /dev/hdc1: 36/3753600 files (8.3% non-contiguous), 644713/7504552 blocks
> 
> real    0m27.921s
> user    0m1.846s
> sys     0m5.648s
> 
> 
> Patch is against 2.6.10-rc3.
> 
> Signed-off-by: Jens Axboe <axboe@suse.de>
> 
> ===== drivers/block/cfq-iosched.c 1.15 vs edited =====
> --- 1.15/drivers/block/cfq-iosched.c 2004-11-30 07:56:58 +01:00
> +++ edited/drivers/block/cfq-iosched.c 2004-12-04 11:41:42 +01:00
> @@ -22,21 +22,24 @@
>  #include <linux/rbtree.h>
>  #include <linux/mempool.h>
>  
> -static unsigned long max_elapsed_crq;
> -static unsigned long max_elapsed_dispatch;
> -
>  /*
>   * tunables
>   */
>  static int cfq_quantum = 4;  /* max queue in one round of service */
>  static int cfq_queued = 8;  /* minimum rq allocate limit per-queue*/
> -static int cfq_service = HZ;  /* period over which service is avg */
>  static int cfq_fifo_expire_r = HZ / 2; /* fifo timeout for sync requests */
>  static int cfq_fifo_expire_w = 5 * HZ; /* fifo timeout for async requests */
>  static int cfq_fifo_rate = HZ / 8; /* fifo expiry rate */
>  static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
>  static int cfq_back_penalty = 2; /* penalty of a backwards seek */
>  
> +static int cfq_slice_sync = HZ / 10;
> +static int cfq_slice_async = HZ / 25;
> +static int cfq_slice_async_rq = 8;
> +static int cfq_slice_idle = HZ / 249;
> +
> +static int cfq_max_depth = 4;
> +
>  /*
>   * for the hash of cfqq inside the cfqd
>   */
> @@ -55,6 +58,7 @@
>  #define list_entry_hash(ptr)	hlist_entry((ptr), struct cfq_rq, hash)
>  
>  #define list_entry_cfqq(ptr)	list_entry((ptr), struct cfq_queue, cfq_list)
> +#define list_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
>  
>  #define RQ_DATA(rq)		(rq)->elevator_private
>  
> @@ -76,22 +80,18 @@
>  #define rq_rb_key(rq)		(rq)->sector
>  
>  /*
> - * threshold for switching off non-tag accounting
> - */
> -#define CFQ_MAX_TAG		(4)
> -
> -/*
>   * sort key types and names
>   */
>  enum {
>  	CFQ_KEY_PGID,
>  	CFQ_KEY_TGID,
> +	CFQ_KEY_PID,
>  	CFQ_KEY_UID,
>  	CFQ_KEY_GID,
>  	CFQ_KEY_LAST,
>  };
>  
> -static char *cfq_key_types[] = { "pgid", "tgid", "uid", "gid", NULL };
> +static char *cfq_key_types[] = { "pgid", "tgid", "pid", "uid", "gid", NULL };
>  
>  /*
>   * spare queue
> @@ -103,6 +103,8 @@
>  static kmem_cache_t *cfq_ioc_pool;
>  
>  struct cfq_data {
> +	atomic_t ref;
> +
>  	struct list_head rr_list;
>  	struct list_head empty_list;
>  
> @@ -114,8 +116,6 @@
>  
>  	unsigned int max_queued;
>  
> -	atomic_t ref;
> -
>  	int key_type;
>  
>  	mempool_t *crq_pool;
> @@ -127,6 +127,14 @@
>  	int rq_in_driver;
>  
>  	/*
> +	 * schedule slice state info
> +	 */
> +	struct timer_list timer;
> +	struct work_struct unplug_work;
> +	struct cfq_queue *active_queue;
> +	unsigned int dispatch_slice;
> +
> +	/*
>  	 * tunables, see top of file
>  	 */
>  	unsigned int cfq_quantum;
> @@ -137,8 +145,10 @@
>  	unsigned int cfq_back_penalty;
>  	unsigned int cfq_back_max;
>  	unsigned int find_best_crq;
> -
> -	unsigned int cfq_tagged;
> +	unsigned int cfq_slice[2];
> +	unsigned int cfq_slice_async_rq;
> +	unsigned int cfq_slice_idle;
> +	unsigned int cfq_max_depth;
>  };
>  
>  struct cfq_queue {
> @@ -150,8 +160,6 @@
>  	struct hlist_node cfq_hash;
>  	/* hash key */
>  	unsigned long key;
> -	/* whether queue is on rr (or empty) list */
> -	int on_rr;
>  	/* on either rr or empty list of cfqd */
>  	struct list_head cfq_list;
>  	/* sorted list of pending requests */
> @@ -169,15 +177,17 @@
>  
>  	int key_type;
>  
> -	unsigned long service_start;
> -	unsigned long service_used;
> +	unsigned long slice_start;
> +	unsigned long slice_end;
> +	unsigned long service_last;
>  
> -	unsigned int max_rate;
> +	/* whether queue is on rr (or empty) list */
> +	unsigned int on_rr : 1;
> +	unsigned int wait_request : 1;
> +	unsigned int must_dispatch : 1;
>  
>  	/* number of requests that have been handed to the driver */
>  	int in_flight;
> -	/* number of currently allocated requests */
> -	int alloc_limit[2];
>  };
>  
>  struct cfq_rq {
> @@ -195,7 +205,6 @@
>  	unsigned int in_flight : 1;
>  	unsigned int accounted : 1;
>  	unsigned int is_sync   : 1;
> -	unsigned int is_write  : 1;
>  };
>  
>  static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned long);
> @@ -219,6 +228,8 @@
>  		default:
>  		case CFQ_KEY_TGID:
>  			return tsk->tgid;
> +		case CFQ_KEY_PID:
> +			return tsk->pid;
>  		case CFQ_KEY_UID:
>  			return tsk->uid;
>  		case CFQ_KEY_GID:
> @@ -406,67 +417,22 @@
>  		cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
>  }
>  
> -static int cfq_check_sort_rr_list(struct cfq_queue *cfqq)
> -{
> -	struct list_head *head = &cfqq->cfqd->rr_list;
> -	struct list_head *next, *prev;
> -
> -	/*
> -	 * list might still be ordered
> -	 */
> -	next = cfqq->cfq_list.next;
> -	if (next != head) {
> -		struct cfq_queue *cnext = list_entry_cfqq(next);
> -
> -		if (cfqq->service_used > cnext->service_used)
> -			return 1;
> -	}
> -
> -	prev = cfqq->cfq_list.prev;
> -	if (prev != head) {
> -		struct cfq_queue *cprev = list_entry_cfqq(prev);
> -
> -		if (cfqq->service_used < cprev->service_used)
> -			return 1;
> -	}
> -
> -	return 0;
> -}
> -
> -static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
> +static void cfq_resort_rr_list(struct cfq_queue *cfqq)
>  {
>  	struct list_head *entry = &cfqq->cfqd->rr_list;
>  
> -	if (!cfqq->on_rr)
> -		return;
> -	if (!new_queue && !cfq_check_sort_rr_list(cfqq))
> -		return;
> -
>  	list_del(&cfqq->cfq_list);
>  
>  	/*
> -	 * sort by our mean service_used, sub-sort by in-flight requests
> +	 * sort by when queue was last serviced
>  	 */
>  	while ((entry = entry->prev) != &cfqq->cfqd->rr_list) {
>  		struct cfq_queue *__cfqq = list_entry_cfqq(entry);
>  
> -		if (cfqq->service_used > __cfqq->service_used)
> +		if (!__cfqq->service_last)
> +			break;
> +		if (time_before(__cfqq->service_last, cfqq->service_last))
>  			break;
> -		else if (cfqq->service_used == __cfqq->service_used) {
> -			struct list_head *prv;
> -
> -			while ((prv = entry->prev) != &cfqq->cfqd->rr_list) {
> -				__cfqq = list_entry_cfqq(prv);
> -
> -				WARN_ON(__cfqq->service_used > cfqq->service_used);
> -				if (cfqq->service_used != __cfqq->service_used)
> -					break;
> -				if (cfqq->in_flight > __cfqq->in_flight)
> -					break;
> -
> -				entry = prv;
> -			}
> -		}
>  	}
>  
>  	list_add(&cfqq->cfq_list, entry);
> @@ -479,16 +445,12 @@
>  static inline void
>  cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
>  {
> -	/*
> -	 * it's currently on the empty list
> -	 */
> -	cfqq->on_rr = 1;
> -	cfqd->busy_queues++;
> +	BUG_ON(cfqq->on_rr);
>  
> -	if (time_after(jiffies, cfqq->service_start + cfq_service))
> -		cfqq->service_used >>= 3;
> +	cfqd->busy_queues++;
> +	cfqq->on_rr = 1;
>  
> -	cfq_sort_rr_list(cfqq, 1);
> +	cfq_resort_rr_list(cfqq);
>  }
>  
>  static inline void
> @@ -512,10 +474,10 @@
>  		struct cfq_data *cfqd = cfqq->cfqd;
>  
>  		BUG_ON(!cfqq->queued[crq->is_sync]);
> +		cfqq->queued[crq->is_sync]--;
>  
>  		cfq_update_next_crq(crq);
>  
> -		cfqq->queued[crq->is_sync]--;
>  		rb_erase(&crq->rb_node, &cfqq->sort_list);
>  		RB_CLEAR_COLOR(&crq->rb_node);
>  
> @@ -622,11 +584,6 @@
>  	if (crq) {
>  		struct cfq_queue *cfqq = crq->cfq_queue;
>  
> -		if (cfqq->cfqd->cfq_tagged) {
> -			cfqq->service_used--;
> -			cfq_sort_rr_list(cfqq, 0);
> -		}
> -
>  		crq->accounted = 0;
>  		cfqq->cfqd->rq_in_driver--;
>  	}
> @@ -640,9 +597,7 @@
>  	if (crq) {
>  		cfq_remove_merge_hints(q, crq);
>  		list_del_init(&rq->queuelist);
> -
> -		if (crq->cfq_queue)
> -			cfq_del_crq_rb(crq);
> +		cfq_del_crq_rb(crq);
>  	}
>  }
>  
> @@ -724,6 +679,99 @@
>  }
>  
>  /*
> + * current cfqq expired its slice (or was too idle), select new one
> + */
> +static inline void cfq_slice_expired(struct cfq_data *cfqd)
> +{
> +	struct cfq_queue *cfqq = cfqd->active_queue;
> +	unsigned long now = jiffies;
> +
> +	if (cfqq) {
> +		if (cfqq->wait_request)
> +			del_timer(&cfqd->timer);
> +
> +		cfqq->service_last = now;
> +		cfqq->must_dispatch = 0;
> +		cfqq->wait_request = 0;
> +
> +		if (cfqq->on_rr)
> +			cfq_resort_rr_list(cfqq);
> +
> +		cfqq = NULL;
> +	}
> +
> +	if (!list_empty(&cfqd->rr_list)) {
> +		cfqq = list_entry_cfqq(cfqd->rr_list.next);
> +
> +		cfqq->slice_start = now;
> +		cfqq->slice_end = 0;
> +		cfqq->wait_request = 0;
> +	}
> +
> +	cfqd->active_queue = cfqq;
> +	cfqd->dispatch_slice = 0;
> +}
> +
> +static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
> +{
> +	WARN_ON(!RB_EMPTY(&cfqq->sort_list));
> +
> +	cfqq->wait_request = 1;
> +
> +	if (!cfqd->cfq_slice_idle)
> +		return 0;
> +
> +	if (!timer_pending(&cfqd->timer)) {
> +		unsigned long now = jiffies, slice_left;
> +
> +		slice_left = cfqq->slice_end - now;
> +		cfqd->timer.expires = now + min(cfqd->cfq_slice_idle, (unsigned int)slice_left);
> +		add_timer(&cfqd->timer);
> +	}
> +
> +	return 1;
> +}
> +
> +/*
> + * get next queue for service
> + */
> +static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
> +{
> +	struct cfq_queue *cfqq = cfqd->active_queue;
> +	unsigned long now = jiffies;
> +
> +	cfqq = cfqd->active_queue;
> +	if (!cfqq)
> +		goto new_queue;
> +
> +	if (cfqq->must_dispatch)
> +		goto must_queue;
> +
> +	/*
> +	 * slice has expired
> +	 */
> +	if (time_after(jiffies, cfqq->slice_end))
> +		goto new_queue;
> +
> +	/*
> +	 * if queue has requests, dispatch one. if not, check if
> +	 * enough slice is left to wait for one
> +	 */
> +must_queue:
> +	if (!RB_EMPTY(&cfqq->sort_list))
> +		goto keep_queue;
> +	else if (cfqq->slice_end - now >= cfqd->cfq_slice_idle) {
> +		if (cfq_arm_slice_timer(cfqd, cfqq))
> +			return NULL;
> +	}
> +
> +new_queue:
> +	cfq_slice_expired(cfqd);
> +keep_queue:
> +	return cfqd->active_queue;
> +}
> +
> +/*
>   * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues,
>   * this function sector sorts the selected request to minimize seeks. we start
>   * at cfqd->last_sector, not 0.
> @@ -741,9 +789,7 @@
>  	list_del(&crq->request->queuelist);
>  
>  	last = cfqd->last_sector;
> -	while ((entry = entry->prev) != head) {
> -		__rq = list_entry_rq(entry);
> -
> +	list_for_each_entry_reverse(__rq, head, queuelist) {
>  		if (blk_barrier_rq(crq->request))
>  			break;
>  		if (!blk_fs_request(crq->request))
> @@ -777,95 +823,100 @@
>  	if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire))
>  		return NULL;
>  
> -	crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist));
> -	if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
> -		cfqq->last_fifo_expire = now;
> -		return crq;
> +	if (reads) {
> +		crq = RQ_DATA(list_entry_fifo(cfqq->fifo[READ].next));
> +		if (time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
> +			cfqq->last_fifo_expire = now;
> +			return crq;
> +		}
>  	}
>  
> -	crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist));
> -	if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
> -		cfqq->last_fifo_expire = now;
> -		return crq;
> +	if (writes) {
> +		crq = RQ_DATA(list_entry_fifo(cfqq->fifo[WRITE].next));
> +		if (time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
> +			cfqq->last_fifo_expire = now;
> +			return crq;
> +		}
>  	}
>  
>  	return NULL;
>  }
>  
> -/*
> - * dispatch a single request from given queue
> - */
> -static inline void
> -cfq_dispatch_request(request_queue_t *q, struct cfq_data *cfqd,
> -		     struct cfq_queue *cfqq)
> +static int
> +__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
> +			int max_dispatch)
>  {
> -	struct cfq_rq *crq;
> +	int dispatched = 0, sync = 0;
> +
> +	BUG_ON(RB_EMPTY(&cfqq->sort_list));
> +
> +	do {
> +		struct cfq_rq *crq;
> +
> +		/*
> +		 * follow expired path, else get first next available
> +		 */
> +		if ((crq = cfq_check_fifo(cfqq)) == NULL) {
> +			if (cfqd->find_best_crq)
> +				crq = cfqq->next_crq;
> +			else
> +				crq = rb_entry_crq(rb_first(&cfqq->sort_list));
> +		}
> +
> +		cfqd->last_sector = crq->request->sector + crq->request->nr_sectors;
> +
> +		/*
> +		 * finally, insert request into driver list
> +		 */
> +		cfq_dispatch_sort(cfqd->queue, crq);
> +
> +		cfqd->dispatch_slice++;
> +		dispatched++;
> +		sync += crq->is_sync;
> +
> +		if (RB_EMPTY(&cfqq->sort_list))
> +			break;
> +
> +	} while (dispatched < max_dispatch);
>  
>  	/*
> -	 * follow expired path, else get first next available
> +	 * if slice end isn't set yet, set it. if at least one request was
> +	 * sync, use the sync time slice value
>  	 */
> -	if ((crq = cfq_check_fifo(cfqq)) == NULL) {
> -		if (cfqd->find_best_crq)
> -			crq = cfqq->next_crq;
> -		else
> -			crq = rb_entry_crq(rb_first(&cfqq->sort_list));
> -	}
> -
> -	cfqd->last_sector = crq->request->sector + crq->request->nr_sectors;
> +	if (!cfqq->slice_end)
> +		cfqq->slice_end = cfqd->cfq_slice[!!sync] + jiffies;
>  
>  	/*
> -	 * finally, insert request into driver list
> +	 * expire an async queue immediately if it has used up its tq slice
>  	 */
> -	cfq_dispatch_sort(q, crq);
> +	if (!sync && cfqd->dispatch_slice >= cfqd->cfq_slice_async_rq)
> +		cfq_slice_expired(cfqd);
> +
> +	return dispatched;
>  }
>  
>  static int cfq_dispatch_requests(request_queue_t *q, int max_dispatch)
>  {
>  	struct cfq_data *cfqd = q->elevator->elevator_data;
>  	struct cfq_queue *cfqq;
> -	struct list_head *entry, *tmp;
> -	int queued, busy_queues, first_round;
>  
>  	if (list_empty(&cfqd->rr_list))
>  		return 0;
>  
> -	queued = 0;
> -	first_round = 1;
> -restart:
> -	busy_queues = 0;
> -	list_for_each_safe(entry, tmp, &cfqd->rr_list) {
> -		cfqq = list_entry_cfqq(entry);
> -
> -		BUG_ON(RB_EMPTY(&cfqq->sort_list));
> -
> -		/*
> -		 * first round of queueing, only select from queues that
> -		 * don't already have io in-flight
> -		 */
> -		if (first_round && cfqq->in_flight)
> -			continue;
> -
> -		cfq_dispatch_request(q, cfqd, cfqq);
> -
> -		if (!RB_EMPTY(&cfqq->sort_list))
> -			busy_queues++;
> -
> -		queued++;
> -	}
> -
> -	if ((queued < max_dispatch) && (busy_queues || first_round)) {
> -		first_round = 0;
> -		goto restart;
> -	}
> +	cfqq = cfq_select_queue(cfqd);
> +	if (!cfqq)
> +		return 0;
>  
> -	return queued;
> +	cfqq->wait_request = 0;
> +	cfqq->must_dispatch = 0;
> +	del_timer(&cfqd->timer);
> +	return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
>  }
>  
>  static inline void cfq_account_dispatch(struct cfq_rq *crq)
>  {
>  	struct cfq_queue *cfqq = crq->cfq_queue;
>  	struct cfq_data *cfqd = cfqq->cfqd;
> -	unsigned long now, elapsed;
>  
>  	/*
>  	 * accounted bit is necessary since some drivers will call
> @@ -874,37 +925,9 @@
>  	if (crq->accounted)
>  		return;
>  
> -	now = jiffies;
> -	if (cfqq->service_start == ~0UL)
> -		cfqq->service_start = now;
> -
> -	/*
> -	 * on drives with tagged command queueing, command turn-around time
> -	 * doesn't necessarily reflect the time spent processing this very
> -	 * command inside the drive. so do the accounting differently there,
> -	 * by just sorting on the number of requests
> -	 */
> -	if (cfqd->cfq_tagged) {
> -		if (time_after(now, cfqq->service_start + cfq_service)) {
> -			cfqq->service_start = now;
> -			cfqq->service_used /= 10;
> -		}
> -
> -		cfqq->service_used++;
> -		cfq_sort_rr_list(cfqq, 0);
> -	}
> -
> -	elapsed = now - crq->queue_start;
> -	if (elapsed > max_elapsed_dispatch)
> -		max_elapsed_dispatch = elapsed;
> -
>  	crq->accounted = 1;
> -	crq->service_start = now;
> -
> -	if (++cfqd->rq_in_driver >= CFQ_MAX_TAG && !cfqd->cfq_tagged) {
> -		cfqq->cfqd->cfq_tagged = 1;
> -		printk("cfq: depth %d reached, tagging now on\n", CFQ_MAX_TAG);
> -	}
> +	crq->service_start = jiffies;
> +	cfqd->rq_in_driver++;
>  }
>  
>  static inline void
> @@ -915,21 +938,18 @@
>  	WARN_ON(!cfqd->rq_in_driver);
>  	cfqd->rq_in_driver--;
>  
> -	if (!cfqd->cfq_tagged) {
> -		unsigned long now = jiffies;
> -		unsigned long duration = now - crq->service_start;
> -
> -		if (time_after(now, cfqq->service_start + cfq_service)) {
> -			cfqq->service_start = now;
> -			cfqq->service_used >>= 3;
> -		}
> -
> -		cfqq->service_used += duration;
> -		cfq_sort_rr_list(cfqq, 0);
> +	/*
> +	 * queue was preempted while this request was servicing
> +	 */
> +	if (cfqd->active_queue != cfqq)
> +		return;
>  
> -		if (duration > max_elapsed_crq)
> -			max_elapsed_crq = duration;
> -	}
> +	/*
> +	 * no requests. if last request was a sync request, wait for
> +	 * a new one.
> +	 */
> +	if (RB_EMPTY(&cfqq->sort_list) && crq->is_sync)
> +		cfq_arm_slice_timer(cfqd, cfqq);
>  }
>  
>  static struct request *cfq_next_request(request_queue_t *q)
> @@ -937,6 +957,9 @@
>  	struct cfq_data *cfqd = q->elevator->elevator_data;
>  	struct request *rq;
>  
> +	if (cfqd->rq_in_driver >= cfqd->cfq_max_depth)
> +		return NULL;
> +
>  	if (!list_empty(&q->queue_head)) {
>  		struct cfq_rq *crq;
>  dispatch:
> @@ -964,6 +987,8 @@
>   */
>  static void cfq_put_queue(struct cfq_queue *cfqq)
>  {
> +	struct cfq_data *cfqd = cfqq->cfqd;
> +
>  	BUG_ON(!atomic_read(&cfqq->ref));
>  
>  	if (!atomic_dec_and_test(&cfqq->ref))
> @@ -972,6 +997,9 @@
>  	BUG_ON(rb_first(&cfqq->sort_list));
>  	BUG_ON(cfqq->on_rr);
>  
> +	if (unlikely(cfqd->active_queue == cfqq))
> +		cfqd->active_queue = NULL;
> +
>  	cfq_put_cfqd(cfqq->cfqd);
>  
>  	/*
> @@ -1117,6 +1145,7 @@
>  		cic->ioc = ioc;
>  		cic->cfqq = __cfqq;
>  		atomic_inc(&__cfqq->ref);
> +		atomic_inc(&cfqd->ref);
>  	} else {
>  		struct cfq_io_context *__cic;
>  		unsigned long flags;
> @@ -1159,10 +1188,10 @@
>  		__cic->ioc = ioc;
>  		__cic->cfqq = __cfqq;
>  		atomic_inc(&__cfqq->ref);
> +		atomic_inc(&cfqd->ref);
>  		spin_lock_irqsave(&ioc->lock, flags);
>  		list_add(&__cic->list, &cic->list);
>  		spin_unlock_irqrestore(&ioc->lock, flags);
> -
>  		cic = __cic;
>  		*cfqq = __cfqq;
>  	}
> @@ -1199,8 +1228,11 @@
>  			new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
>  			spin_lock_irq(cfqd->queue->queue_lock);
>  			goto retry;
> -		} else
> -			goto out;
> +		} else {
> +			cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
> +			if (!cfqq)
> +				goto out;
> +		}
>  
>  		memset(cfqq, 0, sizeof(*cfqq));
>  
> @@ -1216,7 +1248,7 @@
>  		cfqq->cfqd = cfqd;
>  		atomic_inc(&cfqd->ref);
>  		cfqq->key_type = cfqd->key_type;
> -		cfqq->service_start = ~0UL;
> +		cfqq->service_last = 0;
>  	}
>  
>  	if (new_cfqq)
> @@ -1243,14 +1275,31 @@
>  
>  static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq)
>  {
> -	crq->is_sync = 0;
> -	if (rq_data_dir(crq->request) == READ || current->flags & PF_SYNCWRITE)
> -		crq->is_sync = 1;
> +	struct cfq_queue *cfqq = crq->cfq_queue;
> +	struct request *rq = crq->request;
> +
> +	crq->is_sync = rq_data_dir(rq) == READ || current->flags & PF_SYNCWRITE;
>  
>  	cfq_add_crq_rb(crq);
>  	crq->queue_start = jiffies;
>  
> -	list_add_tail(&crq->request->queuelist, &crq->cfq_queue->fifo[crq->is_sync]);
> +	list_add_tail(&rq->queuelist, &cfqq->fifo[crq->is_sync]);
> +
> +	/*
> +	 * if we are waiting for a request for this queue, let it rip
> +	 * immediately and flag that we must not expire this queue just now
> +	 */
> +	if (cfqq->wait_request && cfqq == cfqd->active_queue) {
> +		request_queue_t *q = cfqd->queue;
> +
> +		cfqq->must_dispatch = 1;
> +		del_timer(&cfqd->timer);
> +
> +		if (!blk_queue_plugged(q))
> +			q->request_fn(q);
> +		else
> +			__generic_unplug_device(q);
> +	}
>  }
>  
>  static void
> @@ -1339,32 +1388,31 @@
>  	struct cfq_data *cfqd = q->elevator->elevator_data;
>  	struct cfq_queue *cfqq;
>  	int ret = ELV_MQUEUE_MAY;
> +	int limit;
>  
>  	if (current->flags & PF_MEMALLOC)
>  		return ELV_MQUEUE_MAY;
>  
>  	cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(cfqd, current));
> -	if (cfqq) {
> -		int limit = cfqd->max_queued;
> -
> -		if (cfqq->allocated[rw] < cfqd->cfq_queued)
> -			return ELV_MQUEUE_MUST;
> -
> -		if (cfqd->busy_queues)
> -			limit = q->nr_requests / cfqd->busy_queues;
> -
> -		if (limit < cfqd->cfq_queued)
> -			limit = cfqd->cfq_queued;
> -		else if (limit > cfqd->max_queued)
> -			limit = cfqd->max_queued;
> +	if (unlikely(!cfqq))
> +		return ELV_MQUEUE_MAY;
>  
> -		if (cfqq->allocated[rw] >= limit) {
> -			if (limit > cfqq->alloc_limit[rw])
> -				cfqq->alloc_limit[rw] = limit;
> +	if (cfqq->allocated[rw] < cfqd->cfq_queued)
> +		return ELV_MQUEUE_MUST;
> +	if (cfqq->wait_request)
> +		return ELV_MQUEUE_MUST;
> +
> +	limit = cfqd->max_queued;
> +	if (cfqd->busy_queues)
> +		limit = q->nr_requests / cfqd->busy_queues;
> +
> +	if (limit < cfqd->cfq_queued)
> +		limit = cfqd->cfq_queued;
> +	else if (limit > cfqd->max_queued)
> +		limit = cfqd->max_queued;
>  
> -			ret = ELV_MQUEUE_NO;
> -		}
> -	}
> +	if (cfqq->allocated[rw] >= limit)
> +		ret = ELV_MQUEUE_NO;
>  
>  	return ret;
>  }
> @@ -1372,12 +1420,13 @@
>  static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
>  {
>  	struct request_list *rl = &q->rq;
> -	const int write = waitqueue_active(&rl->wait[WRITE]);
> -	const int read = waitqueue_active(&rl->wait[READ]);
> +	const int writes = waitqueue_active(&rl->wait[WRITE]);
> +	const int reads = waitqueue_active(&rl->wait[READ]);
> +	struct cfq_data *cfqd = q->elevator->elevator_data;
>  
> -	if (read && cfqq->allocated[READ] < cfqq->alloc_limit[READ])
> +	if (reads && cfqq->allocated[READ] < cfqd->max_queued)
>  		wake_up(&rl->wait[READ]);
> -	if (write && cfqq->allocated[WRITE] < cfqq->alloc_limit[WRITE])
> +	if (writes && cfqq->allocated[WRITE] < cfqd->max_queued)
>  		wake_up(&rl->wait[WRITE]);
>  }
>  
> @@ -1391,16 +1440,17 @@
>  
>  	if (crq) {
>  		struct cfq_queue *cfqq = crq->cfq_queue;
> +		const int rw = rq_data_dir(rq);
>  
>  		BUG_ON(q->last_merge == rq);
>  		BUG_ON(!hlist_unhashed(&crq->hash));
>  
> +		BUG_ON(!cfqq->allocated[rw]);
> +		cfqq->allocated[rw]--;
> +
>  		if (crq->io_context)
>  			put_io_context(crq->io_context->ioc);
>  
> -		BUG_ON(!cfqq->allocated[crq->is_write]);
> -		cfqq->allocated[crq->is_write]--;
> -
>  		mempool_free(crq, cfqd->crq_pool);
>  		rq->elevator_private = NULL;
>  
> @@ -1470,9 +1520,7 @@
>  		crq->io_context = cic;
>  		crq->service_start = crq->queue_start = 0;
>  		crq->in_flight = crq->accounted = crq->is_sync = 0;
> -		crq->is_write = rw;
>  		rq->elevator_private = crq;
> -		cfqq->alloc_limit[rw] = 0;
>  		return 0;
>  	}
>  
> @@ -1486,6 +1534,44 @@
>  	return 1;
>  }
>  
> +static void cfq_kick_queue(void *data)
> +{
> +	request_queue_t *q = data;
> +
> +	blk_run_queue(q);
> +}
> +
> +static void cfq_schedule_timer(unsigned long data)
> +{
> +	struct cfq_data *cfqd = (struct cfq_data *) data;
> +	struct cfq_queue *cfqq;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(cfqd->queue->queue_lock, flags);
> +
> +	if ((cfqq = cfqd->active_queue) != NULL) {
> +		/*
> +		 * expired
> +		 */
> +		if (time_after(jiffies, cfqq->slice_end))
> +			goto out;
> +
> +		/*
> +		 * not expired and it has a request pending, let it dispatch
> +		 */
> +		if (!RB_EMPTY(&cfqq->sort_list)) {
> +			cfqq->must_dispatch = 1;
> +			goto out_cont;
> +		}
> +	} 
> +
> +out:
> +	cfq_slice_expired(cfqd);
> +out_cont:
> +	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
> +	kblockd_schedule_work(&cfqd->unplug_work);
> +}
> +
>  static void cfq_put_cfqd(struct cfq_data *cfqd)
>  {
>  	request_queue_t *q = cfqd->queue;
> @@ -1494,6 +1580,8 @@
>  	if (!atomic_dec_and_test(&cfqd->ref))
>  		return;
>  
> +	blk_sync_queue(q);
> +
>  	/*
>  	 * kill spare queue, getting it means we have two refences to it.
>  	 * drop both
> @@ -1565,10 +1653,17 @@
>  	 * some requests. fairness is handled differently
>  	 */
>  	q->nr_requests = 1024;
> -	cfqd->max_queued = q->nr_requests / 16;
> +	cfqd->max_queued = q->nr_requests / 8;
>  	q->nr_batching = cfq_queued;
> -	cfqd->key_type = CFQ_KEY_TGID;
> +	cfqd->key_type = CFQ_KEY_PID;
>  	cfqd->find_best_crq = 1;
> +
> +	init_timer(&cfqd->timer);
> +	cfqd->timer.function = cfq_schedule_timer;
> +	cfqd->timer.data = (unsigned long) cfqd;
> +
> +	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
> +
>  	atomic_set(&cfqd->ref, 1);
>  
>  	cfqd->cfq_queued = cfq_queued;
> @@ -1578,6 +1673,11 @@
>  	cfqd->cfq_fifo_batch_expire = cfq_fifo_rate;
>  	cfqd->cfq_back_max = cfq_back_max;
>  	cfqd->cfq_back_penalty = cfq_back_penalty;
> +	cfqd->cfq_slice[0] = cfq_slice_async;
> +	cfqd->cfq_slice[1] = cfq_slice_sync;
> +	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
> +	cfqd->cfq_slice_idle = cfq_slice_idle;
> +	cfqd->cfq_max_depth = cfq_max_depth;
>  
>  	return 0;
>  out_spare:
> @@ -1624,7 +1724,6 @@
>  	return -ENOMEM;
>  }
>  
> -
>  /*
>   * sysfs parts below -->
>   */
> @@ -1650,13 +1749,6 @@
>  }
>  
>  static ssize_t
> -cfq_clear_elapsed(struct cfq_data *cfqd, const char *page, size_t count)
> -{
> -	max_elapsed_dispatch = max_elapsed_crq = 0;
> -	return count;
> -}
> -
> -static ssize_t
>  cfq_set_key_type(struct cfq_data *cfqd, const char *page, size_t count)
>  {
>  	spin_lock_irq(cfqd->queue->queue_lock);
> @@ -1664,6 +1756,8 @@
>  		cfqd->key_type = CFQ_KEY_PGID;
>  	else if (!strncmp(page, "tgid", 4))
>  		cfqd->key_type = CFQ_KEY_TGID;
> +	else if (!strncmp(page, "pid", 3))
> +		cfqd->key_type = CFQ_KEY_PID;
>  	else if (!strncmp(page, "uid", 3))
>  		cfqd->key_type = CFQ_KEY_UID;
>  	else if (!strncmp(page, "gid", 3))
> @@ -1704,6 +1798,11 @@
>  SHOW_FUNCTION(cfq_find_best_show, cfqd->find_best_crq, 0);
>  SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0);
>  SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0);
> +SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
> +SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
> +SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
> +SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
> +SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0);
>  #undef SHOW_FUNCTION
>  
>  #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
> @@ -1729,6 +1828,11 @@
>  STORE_FUNCTION(cfq_find_best_store, &cfqd->find_best_crq, 0, 1, 0);
>  STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
>  STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);
> +STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
> +STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
> +STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
> +STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
> +STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 2, UINT_MAX, 0);
>  #undef STORE_FUNCTION
>  
>  static struct cfq_fs_entry cfq_quantum_entry = {
> @@ -1771,15 +1875,36 @@
>  	.show = cfq_back_penalty_show,
>  	.store = cfq_back_penalty_store,
>  };
> -static struct cfq_fs_entry cfq_clear_elapsed_entry = {
> -	.attr = {.name = "clear_elapsed", .mode = S_IWUSR },
> -	.store = cfq_clear_elapsed,
> +static struct cfq_fs_entry cfq_slice_sync_entry = {
> +	.attr = {.name = "slice_sync", .mode = S_IRUGO | S_IWUSR },
> +	.show = cfq_slice_sync_show,
> +	.store = cfq_slice_sync_store,
> +};
> +static struct cfq_fs_entry cfq_slice_async_entry = {
> +	.attr = {.name = "slice_async", .mode = S_IRUGO | S_IWUSR },
> +	.show = cfq_slice_async_show,
> +	.store = cfq_slice_async_store,
> +};
> +static struct cfq_fs_entry cfq_slice_async_rq_entry = {
> +	.attr = {.name = "slice_async_rq", .mode = S_IRUGO | S_IWUSR },
> +	.show = cfq_slice_async_rq_show,
> +	.store = cfq_slice_async_rq_store,
> +};
> +static struct cfq_fs_entry cfq_slice_idle_entry = {
> +	.attr = {.name = "slice_idle", .mode = S_IRUGO | S_IWUSR },
> +	.show = cfq_slice_idle_show,
> +	.store = cfq_slice_idle_store,
>  };
>  static struct cfq_fs_entry cfq_key_type_entry = {
>  	.attr = {.name = "key_type", .mode = S_IRUGO | S_IWUSR },
>  	.show = cfq_read_key_type,
>  	.store = cfq_set_key_type,
>  };
> +static struct cfq_fs_entry cfq_max_depth_entry = {
> +	.attr = {.name = "max_depth", .mode = S_IRUGO | S_IWUSR },
> +	.show = cfq_max_depth_show,
> +	.store = cfq_max_depth_store,
> +};
>  
>  static struct attribute *default_attrs[] = {
>  	&cfq_quantum_entry.attr,
> @@ -1791,7 +1916,11 @@
>  	&cfq_find_best_entry.attr,
>  	&cfq_back_max_entry.attr,
>  	&cfq_back_penalty_entry.attr,
> -	&cfq_clear_elapsed_entry.attr,
> +	&cfq_slice_sync_entry.attr,
> +	&cfq_slice_async_entry.attr,
> +	&cfq_slice_async_rq_entry.attr,
> +	&cfq_slice_idle_entry.attr,
> +	&cfq_max_depth_entry.attr,
>  	NULL,
>  };
>  
> @@ -1856,7 +1985,7 @@
>  	.elevator_owner =	THIS_MODULE,
>  };
>  
> -int cfq_init(void)
> +static int __init cfq_init(void)
>  {
>  	int ret;
>  
> @@ -1864,17 +1993,34 @@
>  		return -ENOMEM;
>  
>  	ret = elv_register(&iosched_cfq);
> -	if (!ret) {
> -		__module_get(THIS_MODULE);
> -		return 0;
> -	}
> +	if (ret)
> +		cfq_slab_kill();
>  
> -	cfq_slab_kill();
>  	return ret;
>  }
>  
>  static void __exit cfq_exit(void)
>  {
> +	struct task_struct *g, *p;
> +	unsigned long flags;
> +
> +	read_lock_irqsave(&tasklist_lock, flags);
> +
> +	/*
> +	 * iterate each process in the system, removing our io_context
> +	 */
> +	do_each_thread(g, p) {
> +		struct io_context *ioc = p->io_context;
> +
> +		if (ioc && ioc->cic) {
> +			ioc->cic->exit(ioc->cic);
> +			cfq_free_io_context(ioc->cic);
> +			ioc->cic = NULL;
> +		}
> +	} while_each_thread(g, p);
> +
> +	read_unlock_irqrestore(&tasklist_lock, flags);
> +
>  	cfq_slab_kill();
>  	elv_unregister(&iosched_cfq);
>  }
> ===== drivers/block/ll_rw_blk.c 1.281 vs edited =====
> --- 1.281/drivers/block/ll_rw_blk.c	2004-12-01 09:13:57 +01:00
> +++ edited/drivers/block/ll_rw_blk.c	2004-12-03 13:34:28 +01:00
> @@ -1257,11 +1257,7 @@
>  	if (!blk_remove_plug(q))
>  		return;
>  
> -	/*
> -	 * was plugged, fire request_fn if queue has stuff to do
> -	 */
> -	if (elv_next_request(q))
> -		q->request_fn(q);
> +	q->request_fn(q);
>  }
>  EXPORT_SYMBOL(__generic_unplug_device);
>  
> @@ -2152,7 +2148,6 @@
>  		return;
>  
>  	req->rq_status = RQ_INACTIVE;
> -	req->q = NULL;
>  	req->rl = NULL;
>  
>  	/*
> @@ -2502,6 +2497,7 @@
>  {
>  	struct request_list *rl = &q->rq;
>  	struct request *rq;
> +	int requeued = 0;
>  
>  	spin_lock_irq(q->queue_lock);
>  	clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
> @@ -2510,8 +2506,12 @@
>  		rq = list_entry_rq(q->drain_list.next);
>  
>  		list_del_init(&rq->queuelist);
> -		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
> +		elv_requeue_request(q, rq);
> +		requeued++;
>  	}
> +
> +	if (requeued)
> +		q->request_fn(q);
>  
>  	spin_unlock_irq(q->queue_lock);
>  
> 
> 

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-05 14:21 ` Ed Tomlinson
@ 2004-12-05 15:18   ` Jens Axboe
  2004-12-05 17:58     ` Ed Tomlinson
  0 siblings, 1 reply; 30+ messages in thread
From: Jens Axboe @ 2004-12-05 15:18 UTC (permalink / raw)
  To: Ed Tomlinson; +Cc: Linux Kernel

On Sun, Dec 05 2004, Ed Tomlinson wrote:
> Jens,
> 
> Booting 10-rc3 with this patch applied hangs when I su from root
> to my working user.  Same kernel without elevator=cfq works.   By
> hangs I mean that the su does not complete nor do logins to other
> ids work.  The sysrq keys are active.  
> 
> Please let me know what other info will help.

What type of storage is involved, that's the main question?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-05 15:18   ` Jens Axboe
@ 2004-12-05 17:58     ` Ed Tomlinson
  0 siblings, 0 replies; 30+ messages in thread
From: Ed Tomlinson @ 2004-12-05 17:58 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linux Kernel

On Sunday 05 December 2004 10:18, Jens Axboe wrote:
> On Sun, Dec 05 2004, Ed Tomlinson wrote:
> > Jens,
> > 
> > Booting 10-rc3 with this patch applied hangs when I su from root
> > to my working user.  Same kernel without elevator=cfq works.   By
> > hangs I mean that the su does not complete nor do logins to other
> > ids work.  The sysrq keys are active.  
> > 
> > Please let me know what other info will help.
> 
> What type of storage is involved, that's the main question?

IDE.  Just in case this is from the 10-rc3 boot with the default scheduler.  

Dec  5 09:25:17 bert kernel: Uniform Multi-Platform E-IDE driver Revision: 7.00alpha2
Dec  5 09:25:17 bert kernel: ide: Assuming 33MHz system bus speed for PIO modes; override with idebus=xx
Dec  5 09:25:17 bert kernel: PIIX4: IDE controller at PCI slot 0000:00:14.1
Dec  5 09:25:17 bert kernel: PIIX4: chipset revision 1
Dec  5 09:25:17 bert kernel: PIIX4: not 100%% native mode: will probe irqs later
Dec  5 09:25:17 bert kernel:     ide0: BM-DMA at 0x10e0-0x10e7, BIOS settings: hda:DMA, hdb:pio
Dec  5 09:25:17 bert kernel:     ide1: BM-DMA at 0x10e8-0x10ef, BIOS settings: hdc:DMA, hdd:DMA
Dec  5 09:25:17 bert kernel: hda: Maxtor 6Y080L0, ATA DISK drive
Dec  5 09:25:17 bert kernel: elevator: using anticipatory as default io scheduler
Dec  5 09:25:17 bert kernel: ide0 at 0x1f0-0x1f7,0x3f6 on irq 14
Dec  5 09:25:17 bert kernel: hdc: HL-DT-ST RW/DVD GCC-4480B, ATAPI CD/DVD-ROM drive
Dec  5 09:25:17 bert kernel: hdd: HP COLORADO 20GB, ATAPI TAPE drive
Dec  5 09:25:17 bert kernel: ide1 at 0x170-0x177,0x376 on irq 15

Using udma2.

Ed

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-04 16:39 ` Jeff Sipek
@ 2004-12-05 18:58   ` Jens Axboe
  2004-12-06  0:29     ` Jeff Sipek
  0 siblings, 1 reply; 30+ messages in thread
From: Jens Axboe @ 2004-12-05 18:58 UTC (permalink / raw)
  To: Jeff Sipek; +Cc: Linux Kernel

On Sat, Dec 04 2004, Jeff Sipek wrote:
> On Sat, Dec 04, 2004 at 11:49:21AM +0100, Jens Axboe wrote:
> > Hi,
> > 
> > Second version of the time sliced CFQ. Changes:
> > 
> > - Sync io has a fixed time slice like before, async io has both a time
> >   based and a request based slice limit. The queue slice is expired when
> >   one of these limits are reached.
> > 
> > - Fix a bug in invoking the request handler on a plugged queue.
> > 
> > - Drop the ->alloc_limit wakeup stuff, I'm not so sure it's a good idea
> >   and there are probably wakeup races buried there.
> > 
> > With the async rq slice limit, it behaves perfectly here for me with
> > readers competing with async writers. The main slice settings for a
> > queue are:
> > 
> > - slice_sync: How many msec a sync disk slice lasts
> > - slice_idle: How long a sync slice is allowed to idle
> > - slice_async: How many msec an async disk slice lasts
> > - slice_async_rq: How many requests an async disk slice lasts
> 
> This looks very nice. And from your previous post (with version #1) it
> would look like you made my attempt at io priorities easier. We'll see
> ;-)

It should be really easy to try some rudimentary prio io support - just
scale the time slice based on process priority. A few lines of code
change, and io priority now follows process cpu scheduler priority. To
work really well, the code probably needs a few more limits besides just
slice time.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-05 18:58   ` Jens Axboe
@ 2004-12-06  0:29     ` Jeff Sipek
  2004-12-06  1:59       ` Con Kolivas
  2004-12-06  7:13       ` Jens Axboe
  0 siblings, 2 replies; 30+ messages in thread
From: Jeff Sipek @ 2004-12-06  0:29 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linux Kernel

[-- Attachment #1: Type: text/plain, Size: 679 bytes --]

On Sun, Dec 05, 2004 at 07:58:45PM +0100, Jens Axboe wrote:
> It should be really easy to try some rudimentary prio io support - just
> scale the time slice based on process priority. A few lines of code
> change, and io priority now follows process cpu scheduler priority. To
> work really well, the code probably needs a few more limits besides just
> slice time.

I started working on the rudimentary io prio code, and it got me thinking...
Why use the cpu scheduler priorities? Wouldn't it make more sense to add
io_prio to task_struct? This way you can have a process which you know needs
a lot of CPU but not as much io, or the other way around.

What do you think?

Jeff.

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  0:29     ` Jeff Sipek
@ 2004-12-06  1:59       ` Con Kolivas
  2004-12-06  2:23         ` Jeff Sipek
  2004-12-06  7:13       ` Jens Axboe
  1 sibling, 1 reply; 30+ messages in thread
From: Con Kolivas @ 2004-12-06  1:59 UTC (permalink / raw)
  To: Jeff Sipek; +Cc: Jens Axboe, Linux Kernel

[-- Attachment #1: Type: text/plain, Size: 1403 bytes --]

Jeff Sipek wrote:
> On Sun, Dec 05, 2004 at 07:58:45PM +0100, Jens Axboe wrote:
> 
>>It should be really easy to try some rudimentary prio io support - just
>>scale the time slice based on process priority. A few lines of code
>>change, and io priority now follows process cpu scheduler priority. To
>>work really well, the code probably needs a few more limits besides just
>>slice time.
> 
> 
> I started working on the rudimentary io prio code, and it got me thinking...
> Why use the cpu scheduler priorities? Wouldn't it make more sense to add
> io_prio to task_struct? This way you can have a process which you know needs
> a lot of CPU but not as much io, or the other way around.

That is the design the Jens' original ioprio code used which we used in 
-ck for quite a while. What myself and -ck users found, though, was that 
being tied to cpu 'nice' meant that most tasks behaved pretty much as 
we'd expect based on one sys call.

I think what is ideal is to have both. First the ioprio should be set to 
what the cpu 'nice' level is as a sort of global "this is the priority 
of this task" setting. Then it should also support changing of this 
priority with a different call separate from the cpu nice. That way we 
can take into account access privileges of the caller making it 
impossible to set a high ioprio if the task itself is heavily niced by a 
superuser and so on.

Cheers,
Con

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 256 bytes --]

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  1:59       ` Con Kolivas
@ 2004-12-06  2:23         ` Jeff Sipek
  2004-12-06  2:34           ` Con Kolivas
  0 siblings, 1 reply; 30+ messages in thread
From: Jeff Sipek @ 2004-12-06  2:23 UTC (permalink / raw)
  To: Con Kolivas; +Cc: Jens Axboe, Linux Kernel

[-- Attachment #1: Type: text/plain, Size: 1425 bytes --]

On Mon, Dec 06, 2004 at 12:59:43PM +1100, Con Kolivas wrote:
> Jeff Sipek wrote:
> >I started working on the rudimentary io prio code, and it got me 
> >thinking...
> >Why use the cpu scheduler priorities? Wouldn't it make more sense to add
> >io_prio to task_struct? This way you can have a process which you know 
> >needs
> >a lot of CPU but not as much io, or the other way around.
> 
> That is the design the Jens' original ioprio code used which we used in 
> -ck for quite a while. What myself and -ck users found, though, was that 
> being tied to cpu 'nice' meant that most tasks behaved pretty much as 
> we'd expect based on one sys call.
> 
> I think what is ideal is to have both.

Agreed.

> First the ioprio should be set to 
> what the cpu 'nice' level is as a sort of global "this is the priority 
> of this task" setting. Then it should also support changing of this 
> priority with a different call separate from the cpu nice. That way we 
> can take into account access privileges of the caller making it 
> impossible to set a high ioprio if the task itself is heavily niced by a 
> superuser and so on.

This sounds very reasonable. How would a situation like this one get
handeled:

nice = x
io_prio = y

where x!=y

then, user changes nice. Does the nice level change alone? If so,
providing some "reset to nice==io_prio" capability would make sense, no?

Jeff.

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  2:23         ` Jeff Sipek
@ 2004-12-06  2:34           ` Con Kolivas
  2004-12-06  5:00             ` Kyle Moffett
  0 siblings, 1 reply; 30+ messages in thread
From: Con Kolivas @ 2004-12-06  2:34 UTC (permalink / raw)
  To: Jeff Sipek; +Cc: Jens Axboe, Linux Kernel

[-- Attachment #1: Type: text/plain, Size: 1144 bytes --]

Jeff Sipek wrote:
> On Mon, Dec 06, 2004 at 12:59:43PM +1100, Con Kolivas wrote:
>>First the ioprio should be set to 
>>what the cpu 'nice' level is as a sort of global "this is the priority 
>>of this task" setting. Then it should also support changing of this 
>>priority with a different call separate from the cpu nice. That way we 
>>can take into account access privileges of the caller making it 
>>impossible to set a high ioprio if the task itself is heavily niced by a 
>>superuser and so on.
> 
> 
> This sounds very reasonable. How would a situation like this one get
> handeled:
> 
> nice = x
> io_prio = y
> 
> where x!=y
> 
> then, user changes nice. Does the nice level change alone? If so,
> providing some "reset to nice==io_prio" capability would make sense, no?

I think when nice is changed, ioprio needs to be changed with it as a 
sane default action. I suspect that most of the time people will not use 
the separate ioprio call, but using 'nice' is a regular linuxy thing to 
do. Ideally we make ioprio part of the 'nice' utility and we specify 
both at the same time. Something like:
nice -n 5 -i 20 blah

Cheers,
Con

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 256 bytes --]

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  2:34           ` Con Kolivas
@ 2004-12-06  5:00             ` Kyle Moffett
  2004-12-06  5:14               ` Robert Love
  2004-12-06  7:15               ` Jens Axboe
  0 siblings, 2 replies; 30+ messages in thread
From: Kyle Moffett @ 2004-12-06  5:00 UTC (permalink / raw)
  To: Con Kolivas; +Cc: Jeff Sipek, Linux Kernel, Jens Axboe

On Dec 05, 2004, at 21:34, Con Kolivas wrote:
> I think when nice is changed, ioprio needs to be changed with it as a 
> sane
> default action. I suspect that most of the time people will not use the
> separate ioprio call, but using 'nice' is a regular linuxy thing to 
> do. Ideally
> we make ioprio part of the 'nice' utility and we specify both at the 
> same time.
> Something like: "nice -n 5 -i 20 blah"

What about this:

nice = x;		/* -20 to 20 */
ioprio = y;		/* -40 to 40 */
effective_ioprio = clamp(x+y);	/* -20 to 20 */

This would allow tuning processes for unusual contrasts with the ioprio 
call.
On the other hand, it would allow us to just brute force "adjust" a 
process with
the nice command in the usual way without any changes to the "nice" 
source.

I also thought of a different effective ioprio calculation that scales
instead of clamping:

nice = x;		/* -20 to 20 */
ioprio = y;		/* -20 to 20 */
effective_ioprio = ((ioprio<0)?(20+nice):(20-nice))  *  abs(ioprio)/20;
			/* -20 to 20 */

Cheers,
Kyle Moffett

-----BEGIN GEEK CODE BLOCK-----
Version: 3.12
GCM/CS/IT/U d- s++: a18 C++++>$ UB/L/X/*++++(+)>$ P+++(++++)>$
L++++(+++) E W++(+) N+++(++) o? K? w--- O? M++ V? PS+() PE+(-) Y+
PGP+++ t+(+++) 5 X R? tv-(--) b++++(++) DI+ D+ G e->++++$ h!*()>++$ r  
!y?(-)
------END GEEK CODE BLOCK------



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  5:00             ` Kyle Moffett
@ 2004-12-06  5:14               ` Robert Love
  2004-12-06  7:19                 ` Jens Axboe
  2004-12-06 12:21                 ` Kyle Moffett
  2004-12-06  7:15               ` Jens Axboe
  1 sibling, 2 replies; 30+ messages in thread
From: Robert Love @ 2004-12-06  5:14 UTC (permalink / raw)
  To: Kyle Moffett; +Cc: Con Kolivas, Jeff Sipek, Linux Kernel, Jens Axboe

On Mon, 2004-12-06 at 00:00 -0500, Kyle Moffett wrote:

> What about this:
> 
> nice = x;		/* -20 to 20 */
> ioprio = y;		/* -40 to 40 */
> effective_ioprio = clamp(x+y);	/* -20 to 20 */
> 
> This would allow tuning processes for unusual contrasts with the ioprio 
> call.
> On the other hand, it would allow us to just brute force "adjust" a 
> process with
> the nice command in the usual way without any changes to the "nice" 
> source.
> 
> I also thought of a different effective ioprio calculation that scales
> instead of clamping:

I think the complication of all of this demonstrates the overcomplexity.
I think we need to either

	(1) separate the two values.  we have a scheduling
	    priority (distributing the finite resource of
	    processor time) and an I/O priority (distributing
	    the finite resource of disk bandwidth).
	(2) just have a single value.

Personally, I prefer (1).  But (2) is fine.

What we want to do either way is cleanly separate the concepts in the
kernel.  That way we can decide what we actually expose to user-space.

	Robert Love



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  0:29     ` Jeff Sipek
  2004-12-06  1:59       ` Con Kolivas
@ 2004-12-06  7:13       ` Jens Axboe
  1 sibling, 0 replies; 30+ messages in thread
From: Jens Axboe @ 2004-12-06  7:13 UTC (permalink / raw)
  To: Jeff Sipek; +Cc: Linux Kernel

On Sun, Dec 05 2004, Jeff Sipek wrote:
> On Sun, Dec 05, 2004 at 07:58:45PM +0100, Jens Axboe wrote:
> > It should be really easy to try some rudimentary prio io support - just
> > scale the time slice based on process priority. A few lines of code
> > change, and io priority now follows process cpu scheduler priority. To
> > work really well, the code probably needs a few more limits besides just
> > slice time.
> 
> I started working on the rudimentary io prio code, and it got me
> thinking...  Why use the cpu scheduler priorities? Wouldn't it make
> more sense to add io_prio to task_struct? This way you can have a
> process which you know needs a lot of CPU but not as much io, or the
> other way around.
> 
> What do you think?

I don't like tieing them together, see various threads in the list
archives for discussions about that. I just said that it would be easy
to test basic support this way, since you only have to change a few
lines.

I've already posted the glue code to set/query process priorities, I
would plan on just using something like that again.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  5:00             ` Kyle Moffett
  2004-12-06  5:14               ` Robert Love
@ 2004-12-06  7:15               ` Jens Axboe
  1 sibling, 0 replies; 30+ messages in thread
From: Jens Axboe @ 2004-12-06  7:15 UTC (permalink / raw)
  To: Kyle Moffett; +Cc: Con Kolivas, Jeff Sipek, Linux Kernel

On Mon, Dec 06 2004, Kyle Moffett wrote:
> On Dec 05, 2004, at 21:34, Con Kolivas wrote:
> >I think when nice is changed, ioprio needs to be changed with it as a 
> >sane
> >default action. I suspect that most of the time people will not use the
> >separate ioprio call, but using 'nice' is a regular linuxy thing to 
> >do. Ideally
> >we make ioprio part of the 'nice' utility and we specify both at the 
> >same time.
> >Something like: "nice -n 5 -i 20 blah"
> 
> What about this:
> 
> nice = x;		/* -20 to 20 */
> ioprio = y;		/* -40 to 40 */
> effective_ioprio = clamp(x+y);	/* -20 to 20 */

That's way too many priority levels, there's no way on earth you can
that finely QOS something that you don't have more control over (hard
drive).

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  5:14               ` Robert Love
@ 2004-12-06  7:19                 ` Jens Axboe
  2004-12-06 12:18                   ` Helge Hafting
  2004-12-06 12:21                 ` Kyle Moffett
  1 sibling, 1 reply; 30+ messages in thread
From: Jens Axboe @ 2004-12-06  7:19 UTC (permalink / raw)
  To: Robert Love; +Cc: Kyle Moffett, Con Kolivas, Jeff Sipek, Linux Kernel

On Mon, Dec 06 2004, Robert Love wrote:
> On Mon, 2004-12-06 at 00:00 -0500, Kyle Moffett wrote:
> 
> > What about this:
> > 
> > nice = x;		/* -20 to 20 */
> > ioprio = y;		/* -40 to 40 */
> > effective_ioprio = clamp(x+y);	/* -20 to 20 */
> > 
> > This would allow tuning processes for unusual contrasts with the ioprio 
> > call.
> > On the other hand, it would allow us to just brute force "adjust" a 
> > process with
> > the nice command in the usual way without any changes to the "nice" 
> > source.
> > 
> > I also thought of a different effective ioprio calculation that scales
> > instead of clamping:
> 
> I think the complication of all of this demonstrates the overcomplexity.
> I think we need to either
> 
> 	(1) separate the two values.  we have a scheduling
> 	    priority (distributing the finite resource of
> 	    processor time) and an I/O priority (distributing
> 	    the finite resource of disk bandwidth).
> 	(2) just have a single value.

They are inherently seperate entities, I don't think mixing them up is a
good idea. IO priorities also includes things like attempting to
guarentee disk bandwidth, it isn't always just a 'nice' value.

But lets not get carried away in a pointless discussion. The actual
setting and query interface for io priorities is by far the smallest and
least critical piece of code :-)

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-04 10:49 [PATCH] Time sliced CFQ #2 Jens Axboe
  2004-12-04 16:39 ` Jeff Sipek
  2004-12-05 14:21 ` Ed Tomlinson
@ 2004-12-06  9:31 ` Prakash K. Cheemplavam
  2004-12-06  9:35   ` Jens Axboe
  2 siblings, 1 reply; 30+ messages in thread
From: Prakash K. Cheemplavam @ 2004-12-06  9:31 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linux Kernel, Andrew Morton


[-- Attachment #1.1: Type: text/plain, Size: 545 bytes --]

Hi,

this one crapped out on me, while having heavy disk activity. (updating 
  gentoo portage tree - rebuilding metadata of it). Unfortunately I 
couldn't save the oops, as I had no hd access anymore and X would freeze 
a little later...(and I don't want to risk my data a second time...)

I think it had to do with preempt, or even preempt big kernel lock, as I 
could read something about it. I applied your patch to 2.6.10_rc3-ck1. I 
attached my config, if you want to try yourself with that kernel. cfq2 
runs w/o probs.

Cheers,

Prakash

[-- Attachment #1.2: .config --]
[-- Type: text/plain, Size: 32835 bytes --]

#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.10-rc3-ck1
# Sat Dec  4 06:35:59 2004
#
CONFIG_X86=y
CONFIG_MMU=y
CONFIG_UID16=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y

#
# Code maturity level options
#
CONFIG_EXPERIMENTAL=y
# CONFIG_CLEAN_COMPILE is not set
CONFIG_BROKEN=y
CONFIG_BROKEN_ON_SMP=y
CONFIG_LOCK_KERNEL=y

#
# General setup
#
CONFIG_LOCALVERSION=""
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
# CONFIG_BSD_PROCESS_ACCT_V3 is not set
CONFIG_SYSCTL=y
# CONFIG_AUDIT is not set
CONFIG_LOG_BUF_SHIFT=14
CONFIG_HOTPLUG=y
CONFIG_KOBJECT_UEVENT=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
# CONFIG_KALLSYMS_EXTRA_PASS is not set
CONFIG_FUTEX=y
CONFIG_EPOLL=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SHMEM=y
CONFIG_CC_ALIGN_FUNCTIONS=0
CONFIG_CC_ALIGN_LABELS=0
CONFIG_CC_ALIGN_LOOPS=0
CONFIG_CC_ALIGN_JUMPS=0
# CONFIG_TINY_SHMEM is not set

#
# Loadable module support
#
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_OBSOLETE_MODPARM=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y

#
# Processor type and features
#
CONFIG_X86_PC=y
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
# CONFIG_X86_NUMAQ is not set
# CONFIG_X86_SUMMIT is not set
# CONFIG_X86_BIGSMP is not set
# CONFIG_X86_VISWS is not set
# CONFIG_X86_GENERICARCH is not set
# CONFIG_X86_ES7000 is not set
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
# CONFIG_M686 is not set
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
# CONFIG_MPENTIUM4 is not set
# CONFIG_MK6 is not set
CONFIG_MK7=y
# CONFIG_MK8 is not set
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_X86_GENERIC is not set
CONFIG_X86_CMPXCHG=y
CONFIG_X86_XADD=y
CONFIG_X86_L1_CACHE_SHIFT=6
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_USE_3DNOW=y
# CONFIG_HPET_TIMER is not set
# CONFIG_SMP is not set
CONFIG_PREEMPT=y
CONFIG_PREEMPT_BKL=y
CONFIG_X86_UP_APIC=y
CONFIG_X86_UP_IOAPIC=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_X86_TSC=y
CONFIG_X86_MCE=y
CONFIG_X86_MCE_NONFATAL=y
# CONFIG_X86_MCE_P4THERMAL is not set
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
CONFIG_MICROCODE=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y

#
# Firmware Drivers
#
# CONFIG_EDD is not set
CONFIG_NOHIGHMEM=y
# CONFIG_HIGHMEM4G is not set
# CONFIG_HIGHMEM64G is not set
CONFIG_1GLOWMEM=y
# CONFIG_MATH_EMULATION is not set
CONFIG_MTRR=y
# CONFIG_EFI is not set
CONFIG_HAVE_DEC_LOCK=y
CONFIG_REGPARM=y

#
# Power management options (ACPI, APM)
#
CONFIG_PM=y
# CONFIG_PM_DEBUG is not set
# CONFIG_SOFTWARE_SUSPEND is not set

#
# ACPI (Advanced Configuration and Power Interface) Support
#
CONFIG_ACPI=y
CONFIG_ACPI_BOOT=y
CONFIG_ACPI_INTERPRETER=y
CONFIG_ACPI_SLEEP=y
CONFIG_ACPI_SLEEP_PROC_FS=y
CONFIG_ACPI_AC=y
CONFIG_ACPI_BATTERY=y
CONFIG_ACPI_BUTTON=y
CONFIG_ACPI_VIDEO=y
CONFIG_ACPI_FAN=y
CONFIG_ACPI_PROCESSOR=y
CONFIG_ACPI_THERMAL=y
# CONFIG_ACPI_ASUS is not set
# CONFIG_ACPI_IBM is not set
# CONFIG_ACPI_TOSHIBA is not set
# CONFIG_ACPI_CUSTOM_DSDT is not set
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
CONFIG_ACPI_BUS=y
CONFIG_ACPI_EC=y
CONFIG_ACPI_POWER=y
CONFIG_ACPI_PCI=y
CONFIG_ACPI_SYSTEM=y
# CONFIG_X86_PM_TIMER is not set

#
# APM (Advanced Power Management) BIOS Support
#
# CONFIG_APM is not set

#
# CPU Frequency scaling
#
CONFIG_CPU_FREQ=y
# CONFIG_CPU_FREQ_DEBUG is not set
# CONFIG_CPU_FREQ_PROC_INTF is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
# CONFIG_CPU_FREQ_24_API is not set
# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set
CONFIG_CPU_FREQ_TABLE=y

#
# CPUFreq processor drivers
#
# CONFIG_X86_ACPI_CPUFREQ is not set
# CONFIG_X86_POWERNOW_K6 is not set
# CONFIG_X86_POWERNOW_K7 is not set
# CONFIG_X86_POWERNOW_K8 is not set
# CONFIG_X86_GX_SUSPMOD is not set
# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
# CONFIG_X86_SPEEDSTEP_ICH is not set
# CONFIG_X86_SPEEDSTEP_SMI is not set
# CONFIG_X86_P4_CLOCKMOD is not set
CONFIG_X86_CPUFREQ_NFORCE2=y
# CONFIG_X86_LONGRUN is not set
# CONFIG_X86_LONGHAUL is not set

#
# shared options
#

#
# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
#
CONFIG_PCI=y
# CONFIG_PCI_GOBIOS is not set
# CONFIG_PCI_GOMMCONFIG is not set
CONFIG_PCI_GODIRECT=y
# CONFIG_PCI_GOANY is not set
CONFIG_PCI_DIRECT=y
# CONFIG_PCI_MSI is not set
CONFIG_PCI_LEGACY_PROC=y
CONFIG_PCI_NAMES=y
CONFIG_ISA=y
# CONFIG_EISA is not set
# CONFIG_MCA is not set
# CONFIG_SCx200 is not set

#
# PCCARD (PCMCIA/CardBus) support
#
# CONFIG_PCCARD is not set

#
# PC-card bridges
#
CONFIG_PCMCIA_PROBE=y

#
# PCI Hotplug Support
#
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_FAKE=y
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
# CONFIG_HOTPLUG_PCI_CPCI is not set
# CONFIG_HOTPLUG_PCI_PCIE is not set
# CONFIG_HOTPLUG_PCI_SHPC is not set

#
# Executable file formats
#
CONFIG_BINFMT_ELF=y
# CONFIG_BINFMT_AOUT is not set
CONFIG_BINFMT_MISC=y

#
# Device Drivers
#

#
# Generic Driver Options
#
# CONFIG_STANDALONE is not set
CONFIG_PREVENT_FIRMWARE_BUILD=y
# CONFIG_FW_LOADER is not set
# CONFIG_DEBUG_DRIVER is not set

#
# Memory Technology Devices (MTD)
#
# CONFIG_MTD is not set

#
# Parallel port support
#
# CONFIG_PARPORT is not set

#
# Plug and Play support
#
CONFIG_PNP=y
# CONFIG_PNP_DEBUG is not set

#
# Protocols
#
CONFIG_ISAPNP=y
CONFIG_PNPBIOS=y
# CONFIG_PNPBIOS_PROC_FS is not set
CONFIG_PNPACPI=y

#
# Block devices
#
CONFIG_BLK_DEV_FD=y
# CONFIG_BLK_DEV_XD is not set
# CONFIG_BLK_CPQ_DA is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
# CONFIG_BLK_DEV_DAC960 is not set
# CONFIG_BLK_DEV_UMEM is not set
CONFIG_BLK_DEV_LOOP=y
# CONFIG_BLK_DEV_CRYPTOLOOP is not set
# CONFIG_BLK_DEV_NBD is not set
# CONFIG_BLK_DEV_SX8 is not set
# CONFIG_BLK_DEV_UB is not set
# CONFIG_BLK_DEV_RAM is not set
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_INITRAMFS_SOURCE=""
CONFIG_LBD=y
# CONFIG_CDROM_PKTCDVD is not set

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y

#
# ATA/ATAPI/MFM/RLL support
#
CONFIG_IDE=y
CONFIG_BLK_DEV_IDE=y

#
# Please see Documentation/ide.txt for help/info on IDE drives
#
# CONFIG_BLK_DEV_IDE_SATA is not set
# CONFIG_BLK_DEV_HD_IDE is not set
CONFIG_BLK_DEV_IDEDISK=y
# CONFIG_IDEDISK_MULTI_MODE is not set
CONFIG_BLK_DEV_IDECD=y
# CONFIG_BLK_DEV_IDETAPE is not set
CONFIG_BLK_DEV_IDEFLOPPY=y
# CONFIG_BLK_DEV_IDESCSI is not set
# CONFIG_IDE_TASK_IOCTL is not set

#
# IDE chipset support/bugfixes
#
# CONFIG_IDE_GENERIC is not set
# CONFIG_BLK_DEV_CMD640 is not set
# CONFIG_BLK_DEV_IDEPNP is not set
CONFIG_BLK_DEV_IDEPCI=y
CONFIG_IDEPCI_SHARE_IRQ=y
# CONFIG_BLK_DEV_OFFBOARD is not set
# CONFIG_BLK_DEV_GENERIC is not set
# CONFIG_BLK_DEV_OPTI621 is not set
# CONFIG_BLK_DEV_RZ1000 is not set
CONFIG_BLK_DEV_IDEDMA_PCI=y
# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
CONFIG_IDEDMA_PCI_AUTO=y
# CONFIG_IDEDMA_ONLYDISK is not set
# CONFIG_BLK_DEV_AEC62XX is not set
# CONFIG_BLK_DEV_ALI15X3 is not set
CONFIG_BLK_DEV_AMD74XX=y
# CONFIG_BLK_DEV_ATIIXP is not set
# CONFIG_BLK_DEV_CMD64X is not set
# CONFIG_BLK_DEV_TRIFLEX is not set
# CONFIG_BLK_DEV_CY82C693 is not set
# CONFIG_BLK_DEV_CS5520 is not set
# CONFIG_BLK_DEV_CS5530 is not set
# CONFIG_BLK_DEV_HPT34X is not set
# CONFIG_BLK_DEV_HPT366 is not set
# CONFIG_BLK_DEV_SC1200 is not set
# CONFIG_BLK_DEV_PIIX is not set
# CONFIG_BLK_DEV_NS87415 is not set
# CONFIG_BLK_DEV_PDC202XX_OLD is not set
# CONFIG_BLK_DEV_PDC202XX_NEW is not set
# CONFIG_BLK_DEV_SVWKS is not set
# CONFIG_BLK_DEV_SIIMAGE is not set
# CONFIG_BLK_DEV_SIS5513 is not set
# CONFIG_BLK_DEV_SLC90E66 is not set
# CONFIG_BLK_DEV_TRM290 is not set
# CONFIG_BLK_DEV_VIA82CXXX is not set
# CONFIG_IDE_ARM is not set
# CONFIG_IDE_CHIPSETS is not set
CONFIG_BLK_DEV_IDEDMA=y
# CONFIG_IDEDMA_IVB is not set
CONFIG_IDEDMA_AUTO=y
# CONFIG_BLK_DEV_HD is not set

#
# SCSI device support
#
CONFIG_SCSI=y
CONFIG_SCSI_PROC_FS=y

#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
# CONFIG_CHR_DEV_OSST is not set
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y

#
# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
#
CONFIG_SCSI_MULTI_LUN=y
CONFIG_SCSI_CONSTANTS=y
# CONFIG_SCSI_LOGGING is not set

#
# SCSI Transport Attributes
#
# CONFIG_SCSI_SPI_ATTRS is not set
# CONFIG_SCSI_FC_ATTRS is not set

#
# SCSI low-level drivers
#
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_7000FASST is not set
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AHA152X is not set
# CONFIG_SCSI_AHA1542 is not set
# CONFIG_SCSI_AACRAID is not set
# CONFIG_SCSI_AIC7XXX is not set
# CONFIG_SCSI_AIC7XXX_OLD is not set
# CONFIG_SCSI_AIC79XX is not set
# CONFIG_SCSI_DPT_I2O is not set
# CONFIG_SCSI_ADVANSYS is not set
# CONFIG_SCSI_IN2000 is not set
# CONFIG_MEGARAID_NEWGEN is not set
# CONFIG_MEGARAID_LEGACY is not set
CONFIG_SCSI_SATA=y
# CONFIG_SCSI_SATA_AHCI is not set
# CONFIG_SCSI_SATA_SVW is not set
# CONFIG_SCSI_ATA_PIIX is not set
# CONFIG_SCSI_SATA_NV is not set
# CONFIG_SCSI_SATA_PROMISE is not set
# CONFIG_SCSI_SATA_SX4 is not set
CONFIG_SCSI_SATA_SIL=y
# CONFIG_SCSI_SATA_SIS is not set
# CONFIG_SCSI_SATA_ULI is not set
# CONFIG_SCSI_SATA_VIA is not set
# CONFIG_SCSI_SATA_VITESSE is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_SCSI_CPQFCTS is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_DTC3280 is not set
# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_EATA_PIO is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_GENERIC_NCR5380 is not set
# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set
# CONFIG_SCSI_IPS is not set
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_NCR53C406A is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_PAS16 is not set
# CONFIG_SCSI_PCI2000 is not set
# CONFIG_SCSI_PCI2220I is not set
# CONFIG_SCSI_PSI240I is not set
# CONFIG_SCSI_QLOGIC_FAS is not set
# CONFIG_SCSI_QLOGIC_ISP is not set
# CONFIG_SCSI_QLOGIC_FC is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
CONFIG_SCSI_QLA2XXX=y
# CONFIG_SCSI_QLA21XX is not set
# CONFIG_SCSI_QLA22XX is not set
# CONFIG_SCSI_QLA2300 is not set
# CONFIG_SCSI_QLA2322 is not set
# CONFIG_SCSI_QLA6312 is not set
# CONFIG_SCSI_QLA6322 is not set
# CONFIG_SCSI_SEAGATE is not set
# CONFIG_SCSI_SYM53C416 is not set
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_T128 is not set
# CONFIG_SCSI_U14_34F is not set
# CONFIG_SCSI_ULTRASTOR is not set
# CONFIG_SCSI_NSP32 is not set
# CONFIG_SCSI_DEBUG is not set

#
# Old CD-ROM drivers (not SCSI, not IDE)
#
# CONFIG_CD_NO_IDESCSI is not set

#
# Multi-device support (RAID and LVM)
#
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
# CONFIG_MD_LINEAR is not set
CONFIG_MD_RAID0=y
# CONFIG_MD_RAID1 is not set
# CONFIG_MD_RAID10 is not set
# CONFIG_MD_RAID5 is not set
# CONFIG_MD_RAID6 is not set
# CONFIG_MD_MULTIPATH is not set
# CONFIG_MD_FAULTY is not set
CONFIG_BLK_DEV_DM=y
CONFIG_DM_CRYPT=y
# CONFIG_DM_SNAPSHOT is not set
# CONFIG_DM_MIRROR is not set
# CONFIG_DM_ZERO is not set

#
# Fusion MPT device support
#
# CONFIG_FUSION is not set

#
# IEEE 1394 (FireWire) support
#
# CONFIG_IEEE1394 is not set

#
# I2O device support
#
CONFIG_I2O=y
CONFIG_I2O_CONFIG=y
# CONFIG_I2O_BLOCK is not set
# CONFIG_I2O_SCSI is not set
CONFIG_I2O_PROC=y

#
# Networking support
#
CONFIG_NET=y

#
# Networking options
#
CONFIG_PACKET=y
# CONFIG_PACKET_MMAP is not set
# CONFIG_NETLINK_DEV is not set
CONFIG_UNIX=y
# CONFIG_NET_KEY is not set
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
# CONFIG_IP_ADVANCED_ROUTER is not set
# CONFIG_IP_PNP is not set
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
# CONFIG_IP_MROUTE is not set
# CONFIG_ARPD is not set
# CONFIG_SYN_COOKIES is not set
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
# CONFIG_INET_TUNNEL is not set
CONFIG_IP_TCPDIAG=y
# CONFIG_IP_TCPDIAG_IPV6 is not set
# CONFIG_IPV6 is not set
# CONFIG_NETFILTER is not set

#
# SCTP Configuration (EXPERIMENTAL)
#
# CONFIG_IP_SCTP is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_NET_DIVERT is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set

#
# QoS and/or fair queueing
#
# CONFIG_NET_SCHED is not set
# CONFIG_NET_CLS_ROUTE is not set

#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_NETPOLL is not set
# CONFIG_NET_POLL_CONTROLLER is not set
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
CONFIG_NETDEVICES=y
CONFIG_DUMMY=y
# CONFIG_BONDING is not set
# CONFIG_EQUALIZER is not set
# CONFIG_TUN is not set
# CONFIG_NET_SB1000 is not set

#
# ARCnet devices
#
# CONFIG_ARCNET is not set

#
# Ethernet (10 or 100Mbit)
#
CONFIG_NET_ETHERNET=y
# CONFIG_MII is not set
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
# CONFIG_NET_VENDOR_3COM is not set
# CONFIG_LANCE is not set
# CONFIG_NET_VENDOR_SMC is not set
# CONFIG_NET_VENDOR_RACAL is not set

#
# Tulip family network device support
#
# CONFIG_NET_TULIP is not set
# CONFIG_AT1700 is not set
# CONFIG_DEPCA is not set
# CONFIG_HP100 is not set
# CONFIG_NET_ISA is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
# CONFIG_AMD8111_ETH is not set
# CONFIG_ADAPTEC_STARFIRE is not set
# CONFIG_AC3200 is not set
# CONFIG_APRICOT is not set
# CONFIG_B44 is not set
CONFIG_FORCEDETH=y
# CONFIG_CS89x0 is not set
# CONFIG_DGRS is not set
# CONFIG_EEPRO100 is not set
# CONFIG_E100 is not set
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
# CONFIG_NE2K_PCI is not set
# CONFIG_8139CP is not set
# CONFIG_8139TOO is not set
# CONFIG_SIS900 is not set
# CONFIG_EPIC100 is not set
# CONFIG_SUNDANCE is not set
# CONFIG_TLAN is not set
# CONFIG_VIA_RHINE is not set
# CONFIG_NET_POCKET is not set

#
# Ethernet (1000 Mbit)
#
# CONFIG_ACENIC is not set
# CONFIG_DL2K is not set
# CONFIG_E1000 is not set
# CONFIG_NS83820 is not set
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
# CONFIG_R8169 is not set
# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
# CONFIG_TIGON3 is not set

#
# Ethernet (10000 Mbit)
#
# CONFIG_IXGB is not set
# CONFIG_S2IO is not set

#
# Token Ring devices
#
# CONFIG_TR is not set

#
# Wireless LAN (non-hamradio)
#
# CONFIG_NET_RADIO is not set

#
# Wan interfaces
#
# CONFIG_WAN is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
# CONFIG_PPP is not set
# CONFIG_SLIP is not set
# CONFIG_NET_FC is not set
# CONFIG_SHAPER is not set
# CONFIG_NETCONSOLE is not set

#
# ISDN subsystem
#
# CONFIG_ISDN is not set

#
# Telephony Support
#
# CONFIG_PHONE is not set

#
# Input device support
#
CONFIG_INPUT=y

#
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
CONFIG_INPUT_MOUSEDEV_PSAUX=y
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1280
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1024
# CONFIG_INPUT_JOYDEV is not set
# CONFIG_INPUT_TSDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_EVBUG is not set

#
# Input I/O drivers
#
# CONFIG_GAMEPORT is not set
CONFIG_SOUND_GAMEPORT=y
CONFIG_SERIO=y
CONFIG_SERIO_I8042=y
# CONFIG_SERIO_SERPORT is not set
# CONFIG_SERIO_CT82C710 is not set
# CONFIG_SERIO_PCIPS2 is not set
# CONFIG_SERIO_RAW is not set

#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
CONFIG_KEYBOARD_ATKBD=y
# CONFIG_KEYBOARD_SUNKBD is not set
# CONFIG_KEYBOARD_LKKBD is not set
# CONFIG_KEYBOARD_XTKBD is not set
# CONFIG_KEYBOARD_NEWTON is not set
CONFIG_INPUT_MOUSE=y
CONFIG_MOUSE_PS2=y
# CONFIG_MOUSE_SERIAL is not set
# CONFIG_MOUSE_INPORT is not set
# CONFIG_MOUSE_LOGIBM is not set
# CONFIG_MOUSE_PC110PAD is not set
# CONFIG_MOUSE_VSXXXAA is not set
# CONFIG_INPUT_JOYSTICK is not set
# CONFIG_INPUT_TOUCHSCREEN is not set
# CONFIG_INPUT_MISC is not set

#
# Character devices
#
CONFIG_VT=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
# CONFIG_SERIAL_NONSTANDARD is not set

#
# Serial drivers
#
# CONFIG_SERIAL_8250 is not set

#
# Non-8250 serial port support
#
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=256

#
# IPMI
#
# CONFIG_IPMI_HANDLER is not set

#
# Watchdog Cards
#
# CONFIG_WATCHDOG is not set
# CONFIG_HW_RANDOM is not set
CONFIG_NVRAM=y
CONFIG_RTC=y
# CONFIG_DTLK is not set
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
# CONFIG_SONYPI is not set

#
# Ftape, the floppy tape device driver
#
# CONFIG_FTAPE is not set
# CONFIG_AGP is not set
# CONFIG_DRM is not set
# CONFIG_MWAVE is not set
# CONFIG_RAW_DRIVER is not set
# CONFIG_HPET is not set
# CONFIG_HANGCHECK_TIMER is not set

#
# I2C support
#
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y

#
# I2C Algorithms
#
CONFIG_I2C_ALGOBIT=y
CONFIG_I2C_ALGOPCF=y
# CONFIG_I2C_ALGOPCA is not set

#
# I2C Hardware Bus support
#
# CONFIG_I2C_ALI1535 is not set
# CONFIG_I2C_ALI1563 is not set
# CONFIG_I2C_ALI15X3 is not set
# CONFIG_I2C_AMD756 is not set
# CONFIG_I2C_AMD8111 is not set
# CONFIG_I2C_ELEKTOR is not set
# CONFIG_I2C_I801 is not set
# CONFIG_I2C_I810 is not set
CONFIG_I2C_ISA=y
CONFIG_I2C_NFORCE2=y
# CONFIG_I2C_PARPORT_LIGHT is not set
# CONFIG_I2C_PIIX4 is not set
# CONFIG_I2C_PROSAVAGE is not set
# CONFIG_I2C_SAVAGE4 is not set
# CONFIG_SCx200_ACB is not set
# CONFIG_I2C_SIS5595 is not set
# CONFIG_I2C_SIS630 is not set
# CONFIG_I2C_SIS96X is not set
# CONFIG_I2C_STUB is not set
# CONFIG_I2C_VIA is not set
# CONFIG_I2C_VIAPRO is not set
# CONFIG_I2C_VOODOO3 is not set
# CONFIG_I2C_PCA_ISA is not set

#
# Hardware Sensors Chip support
#
CONFIG_I2C_SENSOR=y
# CONFIG_SENSORS_ADM1021 is not set
# CONFIG_SENSORS_ADM1025 is not set
# CONFIG_SENSORS_ADM1026 is not set
# CONFIG_SENSORS_ADM1031 is not set
# CONFIG_SENSORS_ASB100 is not set
# CONFIG_SENSORS_DS1621 is not set
# CONFIG_SENSORS_FSCHER is not set
# CONFIG_SENSORS_GL518SM is not set
# CONFIG_SENSORS_IT87 is not set
# CONFIG_SENSORS_LM63 is not set
# CONFIG_SENSORS_LM75 is not set
# CONFIG_SENSORS_LM77 is not set
# CONFIG_SENSORS_LM78 is not set
# CONFIG_SENSORS_LM80 is not set
# CONFIG_SENSORS_LM83 is not set
# CONFIG_SENSORS_LM85 is not set
# CONFIG_SENSORS_LM87 is not set
# CONFIG_SENSORS_LM90 is not set
# CONFIG_SENSORS_MAX1619 is not set
# CONFIG_SENSORS_PC87360 is not set
# CONFIG_SENSORS_SMSC47M1 is not set
# CONFIG_SENSORS_VIA686A is not set
# CONFIG_SENSORS_W83781D is not set
# CONFIG_SENSORS_W83L785TS is not set
CONFIG_SENSORS_W83627HF=y

#
# Other I2C Chip support
#
CONFIG_SENSORS_EEPROM=y
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_RTC8564 is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# CONFIG_I2C_DEBUG_CHIP is not set

#
# Dallas's 1-wire bus
#
# CONFIG_W1 is not set

#
# Misc devices
#
# CONFIG_IBM_ASM is not set

#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set

#
# Digital Video Broadcasting Devices
#
CONFIG_DVB=y
CONFIG_DVB_CORE=y

#
# DVB-S (satellite) frontends
#
# CONFIG_DVB_STV0299 is not set
# CONFIG_DVB_CX24110 is not set
# CONFIG_DVB_GRUNDIG_29504_491 is not set
CONFIG_DVB_MT312=y
# CONFIG_DVB_VES1X93 is not set

#
# DVB-T (terrestrial) frontends
#
# CONFIG_DVB_SP887X is not set
# CONFIG_DVB_ALPS_TDLB7 is not set
# CONFIG_DVB_ALPS_TDMB7 is not set
# CONFIG_DVB_CX22702 is not set
# CONFIG_DVB_GRUNDIG_29504_401 is not set
# CONFIG_DVB_TDA1004X is not set
# CONFIG_DVB_NXT6000 is not set
# CONFIG_DVB_MT352 is not set
# CONFIG_DVB_DIB3000MB is not set

#
# DVB-C (cable) frontends
#
# CONFIG_DVB_ATMEL_AT76C651 is not set
# CONFIG_DVB_VES1820 is not set

#
# Misc. Frontend Modules
#

#
# Supported SAA7146 based PCI Adapters
#
# CONFIG_DVB_AV7110 is not set
# CONFIG_DVB_BUDGET is not set
# CONFIG_DVB_BUDGET_CI is not set
# CONFIG_DVB_BUDGET_AV is not set

#
# Supported USB Adapters
#
# CONFIG_DVB_TTUSB_BUDGET is not set
# CONFIG_DVB_TTUSB_DEC is not set
# CONFIG_DVB_DIBUSB is not set
# CONFIG_DVB_CINERGYT2 is not set

#
# Supported FlexCopII (B2C2) Adapters
#
CONFIG_DVB_B2C2_SKYSTAR=y

#
# Supported BT878 Adapters
#

#
# Graphics support
#
CONFIG_FB=y
# CONFIG_FB_MODE_HELPERS is not set
# CONFIG_FB_TILEBLITTING is not set
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_PM2 is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_VGA16 is not set
CONFIG_FB_VESA=y
CONFIG_VIDEO_SELECT=y
# CONFIG_FB_HGA is not set
# CONFIG_FB_RIVA is not set
# CONFIG_FB_I810 is not set
# CONFIG_FB_INTEL is not set
# CONFIG_FB_MATROX is not set
# CONFIG_FB_RADEON_OLD is not set
# CONFIG_FB_RADEON is not set
# CONFIG_FB_ATY128 is not set
# CONFIG_FB_ATY is not set
# CONFIG_FB_SAVAGE is not set
# CONFIG_FB_SIS is not set
# CONFIG_FB_NEOMAGIC is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_3DFX is not set
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_TRIDENT is not set
# CONFIG_FB_PM3 is not set
# CONFIG_FB_VIRTUAL is not set

#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
# CONFIG_MDA_CONSOLE is not set
CONFIG_DUMMY_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FONTS=y
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
# CONFIG_FONT_6x11 is not set
# CONFIG_FONT_PEARL_8x8 is not set
# CONFIG_FONT_ACORN_8x8 is not set
# CONFIG_FONT_MINI_4x6 is not set
# CONFIG_FONT_SUN8x16 is not set
# CONFIG_FONT_SUN12x22 is not set

#
# Logo configuration
#
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_LOGO_LINUX_CLUT224=y

#
# Sound
#
CONFIG_SOUND=y

#
# Advanced Linux Sound Architecture
#
CONFIG_SND=y
CONFIG_SND_TIMER=y
CONFIG_SND_PCM=y
CONFIG_SND_SEQUENCER=y
# CONFIG_SND_SEQ_DUMMY is not set
CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=y
CONFIG_SND_PCM_OSS=y
CONFIG_SND_SEQUENCER_OSS=y
CONFIG_SND_RTCTIMER=y
# CONFIG_SND_VERBOSE_PRINTK is not set
# CONFIG_SND_DEBUG is not set

#
# Generic devices
#
# CONFIG_SND_DUMMY is not set
# CONFIG_SND_VIRMIDI is not set
# CONFIG_SND_MTPAV is not set
# CONFIG_SND_SERIAL_U16550 is not set
# CONFIG_SND_MPU401 is not set

#
# ISA devices
#
# CONFIG_SND_AD1816A is not set
# CONFIG_SND_AD1848 is not set
# CONFIG_SND_CS4231 is not set
# CONFIG_SND_CS4232 is not set
# CONFIG_SND_CS4236 is not set
# CONFIG_SND_ES968 is not set
# CONFIG_SND_ES1688 is not set
# CONFIG_SND_ES18XX is not set
# CONFIG_SND_GUSCLASSIC is not set
# CONFIG_SND_GUSEXTREME is not set
# CONFIG_SND_GUSMAX is not set
# CONFIG_SND_INTERWAVE is not set
# CONFIG_SND_INTERWAVE_STB is not set
# CONFIG_SND_OPTI92X_AD1848 is not set
# CONFIG_SND_OPTI92X_CS4231 is not set
# CONFIG_SND_OPTI93X is not set
# CONFIG_SND_SB8 is not set
# CONFIG_SND_SB16 is not set
# CONFIG_SND_SBAWE is not set
# CONFIG_SND_WAVEFRONT is not set
# CONFIG_SND_ALS100 is not set
# CONFIG_SND_AZT2320 is not set
# CONFIG_SND_CMI8330 is not set
# CONFIG_SND_DT019X is not set
# CONFIG_SND_OPL3SA2 is not set
# CONFIG_SND_SGALAXY is not set
# CONFIG_SND_SSCAPE is not set

#
# PCI devices
#
CONFIG_SND_AC97_CODEC=y
# CONFIG_SND_ALI5451 is not set
# CONFIG_SND_ATIIXP is not set
# CONFIG_SND_ATIIXP_MODEM is not set
# CONFIG_SND_AU8810 is not set
# CONFIG_SND_AU8820 is not set
# CONFIG_SND_AU8830 is not set
# CONFIG_SND_AZT3328 is not set
# CONFIG_SND_BT87X is not set
# CONFIG_SND_CS46XX is not set
# CONFIG_SND_CS4281 is not set
# CONFIG_SND_EMU10K1 is not set
# CONFIG_SND_KORG1212 is not set
# CONFIG_SND_MIXART is not set
# CONFIG_SND_NM256 is not set
# CONFIG_SND_RME32 is not set
# CONFIG_SND_RME96 is not set
# CONFIG_SND_RME9652 is not set
# CONFIG_SND_HDSP is not set
# CONFIG_SND_TRIDENT is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SND_ALS4000 is not set
# CONFIG_SND_CMIPCI is not set
# CONFIG_SND_ENS1370 is not set
# CONFIG_SND_ENS1371 is not set
# CONFIG_SND_ES1938 is not set
# CONFIG_SND_ES1968 is not set
# CONFIG_SND_MAESTRO3 is not set
# CONFIG_SND_FM801 is not set
# CONFIG_SND_ICE1712 is not set
# CONFIG_SND_ICE1724 is not set
CONFIG_SND_INTEL8X0=y
# CONFIG_SND_INTEL8X0M is not set
# CONFIG_SND_SONICVIBES is not set
# CONFIG_SND_VIA82XX is not set
# CONFIG_SND_VX222 is not set

#
# USB devices
#
# CONFIG_SND_USB_AUDIO is not set
# CONFIG_SND_USB_USX2Y is not set

#
# Open Sound System
#
# CONFIG_SOUND_PRIME is not set

#
# USB support
#
CONFIG_USB=y
# CONFIG_USB_DEBUG is not set

#
# Miscellaneous USB options
#
CONFIG_USB_DEVICEFS=y
CONFIG_USB_BANDWIDTH=y
# CONFIG_USB_DYNAMIC_MINORS is not set
CONFIG_USB_SUSPEND=y
# CONFIG_USB_OTG is not set
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y

#
# USB Host Controller Drivers
#
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_SPLIT_ISO=y
CONFIG_USB_EHCI_ROOT_HUB_TT=y
CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_UHCI_HCD is not set

#
# USB Device Class drivers
#
# CONFIG_USB_AUDIO is not set
# CONFIG_USB_BLUETOOTH_TTY is not set
# CONFIG_USB_MIDI is not set
# CONFIG_USB_ACM is not set
CONFIG_USB_PRINTER=y

#
# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
#
CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_DEBUG is not set
# CONFIG_USB_STORAGE_RW_DETECT is not set
# CONFIG_USB_STORAGE_DATAFAB is not set
# CONFIG_USB_STORAGE_FREECOM is not set
# CONFIG_USB_STORAGE_ISD200 is not set
# CONFIG_USB_STORAGE_DPCM is not set
# CONFIG_USB_STORAGE_HP8200e is not set
# CONFIG_USB_STORAGE_SDDR09 is not set
# CONFIG_USB_STORAGE_SDDR55 is not set
# CONFIG_USB_STORAGE_JUMPSHOT is not set

#
# USB Input Devices
#
# CONFIG_USB_HID is not set

#
# USB HID Boot Protocol drivers
#
# CONFIG_USB_KBD is not set
# CONFIG_USB_MOUSE is not set
# CONFIG_USB_AIPTEK is not set
# CONFIG_USB_WACOM is not set
# CONFIG_USB_KBTAB is not set
# CONFIG_USB_POWERMATE is not set
# CONFIG_USB_MTOUCH is not set
# CONFIG_USB_EGALAX is not set
# CONFIG_USB_XPAD is not set
# CONFIG_USB_ATI_REMOTE is not set

#
# USB Imaging devices
#
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set
# CONFIG_USB_HPUSBSCSI is not set

#
# USB Multimedia devices
#
# CONFIG_USB_DABUSB is not set

#
# Video4Linux support is needed for USB Multimedia device support
#

#
# USB Network Adapters
#
# CONFIG_USB_CATC is not set
# CONFIG_USB_KAWETH is not set
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET is not set

#
# USB port drivers
#

#
# USB Serial Converter support
#
# CONFIG_USB_SERIAL is not set

#
# USB Miscellaneous drivers
#
# CONFIG_USB_EMI62 is not set
# CONFIG_USB_EMI26 is not set
# CONFIG_USB_TIGL is not set
# CONFIG_USB_AUERSWALD is not set
# CONFIG_USB_RIO500 is not set
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
# CONFIG_USB_LED is not set
# CONFIG_USB_CYTHERM is not set
# CONFIG_USB_PHIDGETKIT is not set
# CONFIG_USB_PHIDGETSERVO is not set
# CONFIG_USB_TEST is not set

#
# USB ATM/DSL drivers
#

#
# USB Gadget Support
#
# CONFIG_USB_GADGET is not set

#
# File systems
#
CONFIG_EXT2_FS=y
# CONFIG_EXT2_FS_XATTR is not set
CONFIG_EXT3_FS=y
# CONFIG_EXT3_FS_XATTR is not set
CONFIG_JBD=y
# CONFIG_JBD_DEBUG is not set
CONFIG_REISERFS_FS=y
# CONFIG_REISERFS_CHECK is not set
# CONFIG_REISERFS_PROC_INFO is not set
# CONFIG_REISERFS_FS_XATTR is not set
# CONFIG_JFS_FS is not set
CONFIG_FS_POSIX_ACL=y
CONFIG_XFS_FS=y
# CONFIG_XFS_RT is not set
# CONFIG_XFS_QUOTA is not set
# CONFIG_XFS_SECURITY is not set
# CONFIG_XFS_POSIX_ACL is not set
# CONFIG_MINIX_FS is not set
# CONFIG_ROMFS_FS is not set
# CONFIG_QUOTA is not set
CONFIG_DNOTIFY=y
# CONFIG_AUTOFS_FS is not set
CONFIG_AUTOFS4_FS=y

#
# CD-ROM/DVD Filesystems
#
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
CONFIG_ZISOFS_FS=y
CONFIG_UDF_FS=y
CONFIG_UDF_NLS=y

#
# DOS/FAT/NT Filesystems
#
CONFIG_FAT_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_FAT_DEFAULT_CODEPAGE=437
CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-15"
CONFIG_NTFS_FS=y
# CONFIG_NTFS_DEBUG is not set
# CONFIG_NTFS_RW is not set

#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_SYSFS=y
# CONFIG_DEVFS_FS is not set
# CONFIG_DEVPTS_FS_XATTR is not set
CONFIG_TMPFS=y
# CONFIG_TMPFS_XATTR is not set
# CONFIG_HUGETLBFS is not set
# CONFIG_HUGETLB_PAGE is not set
CONFIG_RAMFS=y

#
# Miscellaneous filesystems
#
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_HFS_FS is not set
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
# CONFIG_CRAMFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set

#
# Network File Systems
#
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_NFS_DIRECTIO=y
CONFIG_NFSD=y
CONFIG_NFSD_V3=y
CONFIG_NFSD_V4=y
CONFIG_NFSD_TCP=y
CONFIG_LOCKD=y
CONFIG_LOCKD_V4=y
CONFIG_EXPORTFS=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
CONFIG_RPCSEC_GSS_KRB5=y
# CONFIG_RPCSEC_GSS_SPKM3 is not set
CONFIG_SMB_FS=y
# CONFIG_SMB_NLS_DEFAULT is not set
# CONFIG_CIFS is not set
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set

#
# Partition Types
#
# CONFIG_PARTITION_ADVANCED is not set
CONFIG_MSDOS_PARTITION=y

#
# Native Language Support
#
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="iso8859-1"
CONFIG_NLS_CODEPAGE_437=y
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
CONFIG_NLS_CODEPAGE_850=y
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
# CONFIG_NLS_CODEPAGE_862 is not set
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
# CONFIG_NLS_CODEPAGE_866 is not set
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
# CONFIG_NLS_CODEPAGE_1251 is not set
# CONFIG_NLS_ASCII is not set
CONFIG_NLS_ISO8859_1=y
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
# CONFIG_NLS_ISO8859_5 is not set
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
CONFIG_NLS_ISO8859_15=y
# CONFIG_NLS_KOI8_R is not set
# CONFIG_NLS_KOI8_U is not set
CONFIG_NLS_UTF8=y

#
# Profiling support
#
CONFIG_PROFILING=y
CONFIG_OPROFILE=m

#
# Kernel hacking
#
CONFIG_DEBUG_KERNEL=y
CONFIG_MAGIC_SYSRQ=y
# CONFIG_SCHEDSTATS is not set
# CONFIG_DEBUG_SLAB is not set
# CONFIG_DEBUG_PREEMPT is not set
# CONFIG_DEBUG_SPINLOCK is not set
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_INFO is not set
# CONFIG_FRAME_POINTER is not set
CONFIG_EARLY_PRINTK=y
# CONFIG_DEBUG_STACKOVERFLOW is not set
# CONFIG_KPROBES is not set
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_4KSTACKS is not set
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y

#
# Security options
#
# CONFIG_KEYS is not set
# CONFIG_SECURITY is not set

#
# Cryptographic options
#
CONFIG_CRYPTO=y
# CONFIG_CRYPTO_HMAC is not set
# CONFIG_CRYPTO_NULL is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_SHA1 is not set
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_WP512 is not set
CONFIG_CRYPTO_DES=y
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_TWOFISH is not set
# CONFIG_CRYPTO_SERPENT is not set
# CONFIG_CRYPTO_AES_586 is not set
# CONFIG_CRYPTO_CAST5 is not set
# CONFIG_CRYPTO_CAST6 is not set
# CONFIG_CRYPTO_TEA is not set
# CONFIG_CRYPTO_ARC4 is not set
# CONFIG_CRYPTO_KHAZAD is not set
# CONFIG_CRYPTO_ANUBIS is not set
# CONFIG_CRYPTO_DEFLATE is not set
# CONFIG_CRYPTO_MICHAEL_MIC is not set
# CONFIG_CRYPTO_CRC32C is not set
# CONFIG_CRYPTO_TEST is not set

#
# Library routines
#
# CONFIG_CRC_CCITT is not set
CONFIG_CRC32=y
# CONFIG_LIBCRC32C is not set
CONFIG_ZLIB_INFLATE=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_PC=y

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  9:31 ` Prakash K. Cheemplavam
@ 2004-12-06  9:35   ` Jens Axboe
  2004-12-06 11:48     ` Ed Tomlinson
  2004-12-06 12:31     ` Prakash K. Cheemplavam
  0 siblings, 2 replies; 30+ messages in thread
From: Jens Axboe @ 2004-12-06  9:35 UTC (permalink / raw)
  To: Prakash K. Cheemplavam; +Cc: Linux Kernel, Andrew Morton

On Mon, Dec 06 2004, Prakash K. Cheemplavam wrote:
> Hi,
> 
> this one crapped out on me, while having heavy disk activity. (updating 
>  gentoo portage tree - rebuilding metadata of it). Unfortunately I 
> couldn't save the oops, as I had no hd access anymore and X would freeze 
> a little later...(and I don't want to risk my data a second time...)

Did you save anything at all? Just the function of the EIP would be
better than nothing.
> 
> I think it had to do with preempt, or even preempt big kernel lock, as I 
> could read something about it. I applied your patch to 2.6.10_rc3-ck1. I 
> attached my config, if you want to try yourself with that kernel. cfq2 
> runs w/o probs.

Well hard to say anything qualified without an oops :/

I'll try with PREEMPT here.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  9:35   ` Jens Axboe
@ 2004-12-06 11:48     ` Ed Tomlinson
  2004-12-06 12:31     ` Prakash K. Cheemplavam
  1 sibling, 0 replies; 30+ messages in thread
From: Ed Tomlinson @ 2004-12-06 11:48 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linux Kernel

On Monday 06 December 2004 04:35, Jens Axboe wrote:
> Well hard to say anything qualified without an oops :/
> 
> I'll try with PREEMPT here.

Jens,

Preempt was enabled here too when it hung.

Ed

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  7:19                 ` Jens Axboe
@ 2004-12-06 12:18                   ` Helge Hafting
  2004-12-06 12:24                     ` Jens Axboe
  0 siblings, 1 reply; 30+ messages in thread
From: Helge Hafting @ 2004-12-06 12:18 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Robert Love, Kyle Moffett, Con Kolivas, Jeff Sipek, Linux Kernel

Jens Axboe wrote:

>On Mon, Dec 06 2004, Robert Love wrote:
>  
>
>>
>>	(1) separate the two values.  we have a scheduling
>>	    priority (distributing the finite resource of
>>	    processor time) and an I/O priority (distributing
>>	    the finite resource of disk bandwidth).
>>	(2) just have a single value.
>>    
>>
>
>They are inherently seperate entities, I don't think mixing them up is a
>good idea. IO priorities also includes things like attempting to
>guarentee disk bandwidth, it isn't always just a 'nice' value.
>  
>
Two separate entities is fine.  Those who want just one
entity can use a "nice wrapper" that sets both
simultaneously.

Helge Hafting

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  5:14               ` Robert Love
  2004-12-06  7:19                 ` Jens Axboe
@ 2004-12-06 12:21                 ` Kyle Moffett
  2004-12-06 16:42                   ` Robert Love
  1 sibling, 1 reply; 30+ messages in thread
From: Kyle Moffett @ 2004-12-06 12:21 UTC (permalink / raw)
  To: Robert Love; +Cc: Jeff Sipek, Linux Kernel, Jens Axboe, Con Kolivas

On Dec 06, 2004, at 00:14, Robert Love wrote:
> I think the complication of all of this demonstrates the 
> overcomplexity.
> I think we need to either
>
> 	(1) separate the two values.  we have a scheduling
> 	    priority (distributing the finite resource of
> 	    processor time) and an I/O priority (distributing
> 	    the finite resource of disk bandwidth).
> 	(2) just have a single value.
>
> Personally, I prefer (1).  But (2) is fine.
>
> What we want to do either way is cleanly separate the concepts in the
> kernel.  That way we can decide what we actually expose to user-space.

The reason I proposed my ideas for tying the two values together is 
that I am
concerned about breaking existing code.  I have several binaries to 
which the
source has been lost but I would like to have them continue to properly 
adjust
their priorities internally.  On the other hand, I have other programs 
that I am
currently writing where I would like to be able to have separate IO and 
CPU
priorities.  I believe that we could have two values yet preserve 
backwards
compatibility if we derive the effective IO priority from the sum or 
the provided
IO and CPU priority values, or something along those lines.  That way 
any
program not knowing about the new syscall could just nice() and get both
values adjusted.  If a parent program ran "ioprio()" beforehand to 
adjust the
ioprio with respect to the nice value, then that balance would be 
maintained.

Cheers,
Kyle Moffett

-----BEGIN GEEK CODE BLOCK-----
Version: 3.12
GCM/CS/IT/U d- s++: a18 C++++>$ UB/L/X/*++++(+)>$ P+++(++++)>$
L++++(+++) E W++(+) N+++(++) o? K? w--- O? M++ V? PS+() PE+(-) Y+
PGP+++ t+(+++) 5 X R? tv-(--) b++++(++) DI+ D+ G e->++++$ h!*()>++$ r  
!y?(-)
------END GEEK CODE BLOCK------



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06 12:18                   ` Helge Hafting
@ 2004-12-06 12:24                     ` Jens Axboe
  0 siblings, 0 replies; 30+ messages in thread
From: Jens Axboe @ 2004-12-06 12:24 UTC (permalink / raw)
  To: Helge Hafting
  Cc: Robert Love, Kyle Moffett, Con Kolivas, Jeff Sipek, Linux Kernel

On Mon, Dec 06 2004, Helge Hafting wrote:
> Jens Axboe wrote:
> 
> >On Mon, Dec 06 2004, Robert Love wrote:
> > 
> >
> >>
> >>	(1) separate the two values.  we have a scheduling
> >>	    priority (distributing the finite resource of
> >>	    processor time) and an I/O priority (distributing
> >>	    the finite resource of disk bandwidth).
> >>	(2) just have a single value.
> >>   
> >>
> >
> >They are inherently seperate entities, I don't think mixing them up is a
> >good idea. IO priorities also includes things like attempting to
> >guarentee disk bandwidth, it isn't always just a 'nice' value.
> > 
> >
> Two separate entities is fine.  Those who want just one
> entity can use a "nice wrapper" that sets both
> simultaneously.

Did you happen to catch any info out of the crash?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06  9:35   ` Jens Axboe
  2004-12-06 11:48     ` Ed Tomlinson
@ 2004-12-06 12:31     ` Prakash K. Cheemplavam
  2004-12-06 13:27       ` [PATCH] Time sliced CFQ #3 Jens Axboe
  1 sibling, 1 reply; 30+ messages in thread
From: Prakash K. Cheemplavam @ 2004-12-06 12:31 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linux Kernel, Andrew Morton

[-- Attachment #1: Type: text/plain, Size: 922 bytes --]

Jens Axboe schrieb:
> On Mon, Dec 06 2004, Prakash K. Cheemplavam wrote:
> 
>>Hi,
>>
>>this one crapped out on me, while having heavy disk activity. (updating 
>> gentoo portage tree - rebuilding metadata of it). Unfortunately I 
>>couldn't save the oops, as I had no hd access anymore and X would freeze 
>>a little later...(and I don't want to risk my data a second time...)
> 
> 
> Did you save anything at all? Just the function of the EIP would be
> better than nothing.

Nope, sorry. I hoped it would be in the logs, but it seems as new cfq 
went havoc, hd access went dead. And I was a bit too nervous about my 
data so that I didn't write it down by hand...

> Well hard to say anything qualified without an oops :/
> 
> I'll try with PREEMPT here.

If you are not able to reproduce, I will try it again on a spare 
partition... Should access to zip drive stil be possible if hd's 
io-scheduler is dead?

Prakash


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH] Time sliced CFQ #3
  2004-12-06 12:31     ` Prakash K. Cheemplavam
@ 2004-12-06 13:27       ` Jens Axboe
  2004-12-06 14:01         ` Søren Lott
                           ` (2 more replies)
  0 siblings, 3 replies; 30+ messages in thread
From: Jens Axboe @ 2004-12-06 13:27 UTC (permalink / raw)
  To: Prakash K. Cheemplavam; +Cc: Linux Kernel, Andrew Morton, helge.hafting

On Mon, Dec 06 2004, Prakash K. Cheemplavam wrote:
> Jens Axboe schrieb:
> >On Mon, Dec 06 2004, Prakash K. Cheemplavam wrote:
> >
> >>Hi,
> >>
> >>this one crapped out on me, while having heavy disk activity. (updating 
> >>gentoo portage tree - rebuilding metadata of it). Unfortunately I 
> >>couldn't save the oops, as I had no hd access anymore and X would freeze 
> >>a little later...(and I don't want to risk my data a second time...)
> >
> >
> >Did you save anything at all? Just the function of the EIP would be
> >better than nothing.
> 
> Nope, sorry. I hoped it would be in the logs, but it seems as new cfq 
> went havoc, hd access went dead. And I was a bit too nervous about my 
> data so that I didn't write it down by hand...

It is really rare for the io scheduler to cause serious data screwups,
thankfully. Often what will happen is that it will crash, but with
everything written fine up to that point. So it's similar to a power
loss, but the drive should get it's cache out on its own.

> >Well hard to say anything qualified without an oops :/
> >
> >I'll try with PREEMPT here.
> 
> If you are not able to reproduce, I will try it again on a spare 
> partition... Should access to zip drive stil be possible if hd's 
> io-scheduler is dead?

Depends on where it died, really. But the chances are probably slim.

If you feel like giving it another go, I've uploaded a new patch here:

http://www.kernel.org/pub/linux/kernel/people/axboe/patches/v2.6/2.6.10-rc3/cfq-time-slices-6.gz

Changes:

- Increase async_rq slice significantly (from 8 to 128)

- Fix accounting bug that prevented non-fs requests from working
  correctly. Things like cdrecord and cdda rippers would hang.

- Add logic to check whether a given process is potentially runnable or
  not. We don't arm the slice idle timer if the process has exited or is
  not either running or about to be running.

- TCQ fix: don't idle drive until last request comes in.

- Fix a stall with exiting task holding the active queue. This should
  fix Helges problems, I hope.

- Restore ->nr_requests on io scheduler switch

- Kill ->pid from io_context, this seems to have been added with 'as'
  but never used by anyone.


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #3
  2004-12-06 13:27       ` [PATCH] Time sliced CFQ #3 Jens Axboe
@ 2004-12-06 14:01         ` Søren Lott
  2004-12-06 15:01           ` Jens Axboe
  2004-12-06 15:07         ` Prakash K. Cheemplavam
  2004-12-06 23:30         ` Ed Tomlinson
  2 siblings, 1 reply; 30+ messages in thread
From: Søren Lott @ 2004-12-06 14:01 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Prakash K. Cheemplavam, Linux Kernel, Andrew Morton, helge.hafting

On Mon, 6 Dec 2004 14:27:50 +0100, Jens Axboe <axboe@suse.de> wrote:
> http://www.kernel.org/pub/linux/kernel/people/axboe/patches/v2.6/2.6.10-rc3/cfq-time-slices-6.gz
> 

would be possible get a patch against -mm4 ?

Thanks.

-SL

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #3
  2004-12-06 14:01         ` Søren Lott
@ 2004-12-06 15:01           ` Jens Axboe
  2004-12-06 15:45             ` Jens Axboe
  0 siblings, 1 reply; 30+ messages in thread
From: Jens Axboe @ 2004-12-06 15:01 UTC (permalink / raw)
  To: Søren Lott
  Cc: Prakash K. Cheemplavam, Linux Kernel, Andrew Morton, helge.hafting

On Mon, Dec 06 2004, Søren Lott wrote:
> On Mon, 6 Dec 2004 14:27:50 +0100, Jens Axboe <axboe@suse.de> wrote:
> > http://www.kernel.org/pub/linux/kernel/people/axboe/patches/v2.6/2.6.10-rc3/cfq-time-slices-6.gz
> > 
> 
> would be possible get a patch against -mm4 ?

Sure, in fact I will move development to -mm instead.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #3
  2004-12-06 13:27       ` [PATCH] Time sliced CFQ #3 Jens Axboe
  2004-12-06 14:01         ` Søren Lott
@ 2004-12-06 15:07         ` Prakash K. Cheemplavam
  2004-12-06 23:30         ` Ed Tomlinson
  2 siblings, 0 replies; 30+ messages in thread
From: Prakash K. Cheemplavam @ 2004-12-06 15:07 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linux Kernel, Andrew Morton, helge.hafting

[-- Attachment #1: Type: text/plain, Size: 494 bytes --]

Jens Axboe schrieb:
> Depends on where it died, really. But the chances are probably slim.
> 
> If you feel like giving it another go, I've uploaded a new patch here:


So, I was brave ;-) and tried another go with this one. Seems to work 
this time. Still the issue with sustained write rate, but this time my 
mailer is still somewhat usable while writing. It gets to a crawl, but 
it still reacts....so I'd consider this a little progress (with the help 
of a regression).

Cheers,

Prakash

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #3
  2004-12-06 15:01           ` Jens Axboe
@ 2004-12-06 15:45             ` Jens Axboe
  0 siblings, 0 replies; 30+ messages in thread
From: Jens Axboe @ 2004-12-06 15:45 UTC (permalink / raw)
  To: Søren Lott
  Cc: Prakash K. Cheemplavam, Linux Kernel, Andrew Morton, helge.hafting

On Mon, Dec 06 2004, Jens Axboe wrote:
> On Mon, Dec 06 2004, Søren Lott wrote:
> > On Mon, 6 Dec 2004 14:27:50 +0100, Jens Axboe <axboe@suse.de> wrote:
> > > http://www.kernel.org/pub/linux/kernel/people/axboe/patches/v2.6/2.6.10-rc3/cfq-time-slices-6.gz
> > > 
> > 
> > would be possible get a patch against -mm4 ?
> 
> Sure, in fact I will move development to -mm instead.

Here is one. Changes:

- Port to 2.6.10-rc2-mm4

- Change the 'is task running or about to run' to a much better sched.c
  helper from Ingo.

- Kill PF_SYNCWRITE, rely on WRITE_SYNC instead.

 drivers/block/as-iosched.c  |    3 
 drivers/block/cfq-iosched.c |  706 ++++++++++++++++++++++++++++----------------
 drivers/block/ll_rw_blk.c   |   20 -
 fs/buffer.c                 |    4 
 fs/fs-writeback.c           |    2 
 fs/mpage.c                  |   31 +
 include/linux/blkdev.h      |    5 
 include/linux/sched.h       |    4 
 kernel/sched.c              |   37 ++
 9 files changed, 525 insertions(+), 287 deletions(-)

diff -urP -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.10-rc2-mm4/drivers/block/as-iosched.c linux-2.6.10-rc2-mm4/drivers/block/as-iosched.c
--- /opt/kernel/linux-2.6.10-rc2-mm4/drivers/block/as-iosched.c	2004-12-06 16:07:50.000000000 +0100
+++ linux-2.6.10-rc2-mm4/drivers/block/as-iosched.c	2004-12-06 16:11:27.000000000 +0100
@@ -1415,8 +1415,7 @@
 	struct as_rq *alias;
 	int data_dir;
 
-	if (rq_data_dir(arq->request) == READ
-			|| current->flags&PF_SYNCWRITE)
+	if (rq_data_dir(arq->request) == READ || blk_rq_sync(arq->request))
 		arq->is_sync = 1;
 	else
 		arq->is_sync = 0;
diff -urP -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.10-rc2-mm4/drivers/block/cfq-iosched.c linux-2.6.10-rc2-mm4/drivers/block/cfq-iosched.c
--- /opt/kernel/linux-2.6.10-rc2-mm4/drivers/block/cfq-iosched.c	2004-12-06 16:07:50.000000000 +0100
+++ linux-2.6.10-rc2-mm4/drivers/block/cfq-iosched.c	2004-12-06 16:36:46.854550611 +0100
@@ -22,21 +22,24 @@
 #include <linux/rbtree.h>
 #include <linux/mempool.h>
 
-static unsigned long max_elapsed_crq;
-static unsigned long max_elapsed_dispatch;
-
 /*
  * tunables
  */
 static int cfq_quantum = 4;		/* max queue in one round of service */
 static int cfq_queued = 8;		/* minimum rq allocate limit per-queue*/
-static int cfq_service = HZ;		/* period over which service is avg */
 static int cfq_fifo_expire_r = HZ / 2;	/* fifo timeout for sync requests */
 static int cfq_fifo_expire_w = 5 * HZ;	/* fifo timeout for async requests */
 static int cfq_fifo_rate = HZ / 8;	/* fifo expiry rate */
 static int cfq_back_max = 16 * 1024;	/* maximum backwards seek, in KiB */
 static int cfq_back_penalty = 2;	/* penalty of a backwards seek */
 
+static int cfq_slice_sync = HZ / 10;
+static int cfq_slice_async = HZ / 25;
+static int cfq_slice_async_rq = 128;
+static int cfq_slice_idle = HZ / 249;
+
+static int cfq_max_depth = 4;
+
 /*
  * for the hash of cfqq inside the cfqd
  */
@@ -55,6 +58,7 @@
 #define list_entry_hash(ptr)	hlist_entry((ptr), struct cfq_rq, hash)
 
 #define list_entry_cfqq(ptr)	list_entry((ptr), struct cfq_queue, cfq_list)
+#define list_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
 
 #define RQ_DATA(rq)		(rq)->elevator_private
 
@@ -76,22 +80,18 @@
 #define rq_rb_key(rq)		(rq)->sector
 
 /*
- * threshold for switching off non-tag accounting
- */
-#define CFQ_MAX_TAG		(4)
-
-/*
  * sort key types and names
  */
 enum {
 	CFQ_KEY_PGID,
 	CFQ_KEY_TGID,
+	CFQ_KEY_PID,
 	CFQ_KEY_UID,
 	CFQ_KEY_GID,
 	CFQ_KEY_LAST,
 };
 
-static char *cfq_key_types[] = { "pgid", "tgid", "uid", "gid", NULL };
+static char *cfq_key_types[] = { "pgid", "tgid", "pid", "uid", "gid", NULL };
 
 /*
  * spare queue
@@ -103,6 +103,8 @@
 static kmem_cache_t *cfq_ioc_pool;
 
 struct cfq_data {
+	atomic_t ref;
+
 	struct list_head rr_list;
 	struct list_head empty_list;
 
@@ -114,8 +116,6 @@
 
 	unsigned int max_queued;
 
-	atomic_t ref;
-
 	int key_type;
 
 	mempool_t *crq_pool;
@@ -127,6 +127,14 @@
 	int rq_in_driver;
 
 	/*
+	 * schedule slice state info
+	 */
+	struct timer_list timer;
+	struct work_struct unplug_work;
+	struct cfq_queue *active_queue;
+	unsigned int dispatch_slice;
+
+	/*
 	 * tunables, see top of file
 	 */
 	unsigned int cfq_quantum;
@@ -137,8 +145,10 @@
 	unsigned int cfq_back_penalty;
 	unsigned int cfq_back_max;
 	unsigned int find_best_crq;
-
-	unsigned int cfq_tagged;
+	unsigned int cfq_slice[2];
+	unsigned int cfq_slice_async_rq;
+	unsigned int cfq_slice_idle;
+	unsigned int cfq_max_depth;
 };
 
 struct cfq_queue {
@@ -150,8 +160,6 @@
 	struct hlist_node cfq_hash;
 	/* hash key */
 	unsigned long key;
-	/* whether queue is on rr (or empty) list */
-	int on_rr;
 	/* on either rr or empty list of cfqd */
 	struct list_head cfq_list;
 	/* sorted list of pending requests */
@@ -169,15 +177,19 @@
 
 	int key_type;
 
-	unsigned long service_start;
-	unsigned long service_used;
-
-	unsigned int max_rate;
+	unsigned long slice_start;
+	unsigned long slice_end;
+	unsigned long service_last;
 
 	/* number of requests that have been handed to the driver */
 	int in_flight;
-	/* number of currently allocated requests */
-	int alloc_limit[2];
+
+	/* whether queue is on rr (or empty) list */
+	unsigned int on_rr : 1;
+	/* idle slice, waiting for new request submission */
+	unsigned int wait_request : 1;
+	/* idle slice, request added, now waiting to dispatch it */
+	unsigned int must_dispatch : 1;
 };
 
 struct cfq_rq {
@@ -195,7 +207,6 @@
 	unsigned int in_flight : 1;
 	unsigned int accounted : 1;
 	unsigned int is_sync   : 1;
-	unsigned int is_write  : 1;
 };
 
 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned long);
@@ -219,6 +230,8 @@
 		default:
 		case CFQ_KEY_TGID:
 			return tsk->tgid;
+		case CFQ_KEY_PID:
+			return tsk->pid;
 		case CFQ_KEY_UID:
 			return tsk->uid;
 		case CFQ_KEY_GID:
@@ -406,67 +419,22 @@
 		cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
 }
 
-static int cfq_check_sort_rr_list(struct cfq_queue *cfqq)
-{
-	struct list_head *head = &cfqq->cfqd->rr_list;
-	struct list_head *next, *prev;
-
-	/*
-	 * list might still be ordered
-	 */
-	next = cfqq->cfq_list.next;
-	if (next != head) {
-		struct cfq_queue *cnext = list_entry_cfqq(next);
-
-		if (cfqq->service_used > cnext->service_used)
-			return 1;
-	}
-
-	prev = cfqq->cfq_list.prev;
-	if (prev != head) {
-		struct cfq_queue *cprev = list_entry_cfqq(prev);
-
-		if (cfqq->service_used < cprev->service_used)
-			return 1;
-	}
-
-	return 0;
-}
-
-static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
+static void cfq_resort_rr_list(struct cfq_queue *cfqq)
 {
 	struct list_head *entry = &cfqq->cfqd->rr_list;
 
-	if (!cfqq->on_rr)
-		return;
-	if (!new_queue && !cfq_check_sort_rr_list(cfqq))
-		return;
-
 	list_del(&cfqq->cfq_list);
 
 	/*
-	 * sort by our mean service_used, sub-sort by in-flight requests
+	 * sort by when queue was last serviced
 	 */
 	while ((entry = entry->prev) != &cfqq->cfqd->rr_list) {
 		struct cfq_queue *__cfqq = list_entry_cfqq(entry);
 
-		if (cfqq->service_used > __cfqq->service_used)
+		if (!__cfqq->service_last)
+			break;
+		if (time_before(__cfqq->service_last, cfqq->service_last))
 			break;
-		else if (cfqq->service_used == __cfqq->service_used) {
-			struct list_head *prv;
-
-			while ((prv = entry->prev) != &cfqq->cfqd->rr_list) {
-				__cfqq = list_entry_cfqq(prv);
-
-				WARN_ON(__cfqq->service_used > cfqq->service_used);
-				if (cfqq->service_used != __cfqq->service_used)
-					break;
-				if (cfqq->in_flight > __cfqq->in_flight)
-					break;
-
-				entry = prv;
-			}
-		}
 	}
 
 	list_add(&cfqq->cfq_list, entry);
@@ -479,16 +447,12 @@
 static inline void
 cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	/*
-	 * it's currently on the empty list
-	 */
-	cfqq->on_rr = 1;
-	cfqd->busy_queues++;
+	BUG_ON(cfqq->on_rr);
 
-	if (time_after(jiffies, cfqq->service_start + cfq_service))
-		cfqq->service_used >>= 3;
+	cfqd->busy_queues++;
+	cfqq->on_rr = 1;
 
-	cfq_sort_rr_list(cfqq, 1);
+	cfq_resort_rr_list(cfqq);
 }
 
 static inline void
@@ -512,10 +476,10 @@
 		struct cfq_data *cfqd = cfqq->cfqd;
 
 		BUG_ON(!cfqq->queued[crq->is_sync]);
+		cfqq->queued[crq->is_sync]--;
 
 		cfq_update_next_crq(crq);
 
-		cfqq->queued[crq->is_sync]--;
 		rb_erase(&crq->rb_node, &cfqq->sort_list);
 		RB_CLEAR_COLOR(&crq->rb_node);
 
@@ -619,17 +583,13 @@
 {
 	struct cfq_rq *crq = RQ_DATA(rq);
 
-	if (crq) {
+	if (crq && crq->accounted) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
 
-		if (cfqq->cfqd->cfq_tagged) {
-			cfqq->service_used--;
-			cfq_sort_rr_list(cfqq, 0);
-		}
-
 		crq->accounted = 0;
 		cfqq->cfqd->rq_in_driver--;
 	}
+
 	list_add(&rq->queuelist, &q->queue_head);
 }
 
@@ -640,9 +600,7 @@
 	if (crq) {
 		cfq_remove_merge_hints(q, crq);
 		list_del_init(&rq->queuelist);
-
-		if (crq->cfq_queue)
-			cfq_del_crq_rb(crq);
+		cfq_del_crq_rb(crq);
 	}
 }
 
@@ -723,6 +681,121 @@
 	cfq_remove_request(q, next);
 }
 
+static inline void cfq_set_active_queue(struct cfq_data *cfqd)
+{
+	struct cfq_queue *cfqq = NULL;
+
+	if (!list_empty(&cfqd->rr_list)) {
+		cfqq = list_entry_cfqq(cfqd->rr_list.next);
+
+		cfqq->slice_start = jiffies;
+		cfqq->slice_end = 0;
+		cfqq->wait_request = 0;
+	}
+
+	cfqd->active_queue = cfqq;
+}
+
+/*
+ * current cfqq expired its slice (or was too idle), select new one
+ */
+static inline void cfq_slice_expired(struct cfq_data *cfqd)
+{
+	struct cfq_queue *cfqq = cfqd->active_queue;
+
+	if (cfqq) {
+		if (cfqq->wait_request)
+			del_timer(&cfqd->timer);
+
+		cfqq->service_last = jiffies;
+		cfqq->must_dispatch = 0;
+		cfqq->wait_request = 0;
+
+		if (cfqq->on_rr)
+			cfq_resort_rr_list(cfqq);
+
+		cfqd->active_queue = NULL;
+	}
+
+	cfqd->dispatch_slice = 0;
+}
+
+static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+			       struct cfq_io_context *cic)
+{
+	WARN_ON(!RB_EMPTY(&cfqq->sort_list));
+
+	cfqq->wait_request = 1;
+
+	if (!cfqd->cfq_slice_idle)
+		return 0;
+
+	/*
+	 * don't arm a slice timer if we don't expect the task to run
+	 * after the slice has ended
+	 */
+	if (cic) {
+		unsigned long sched_time;
+
+		if (!cic->ioc->task)
+			return 0;
+
+		sched_time = task_will_schedule_at(cic->ioc->task);
+		if (sched_time && time_after(sched_time, cfqq->slice_end))
+			return 0;
+	}
+
+	if (!timer_pending(&cfqd->timer)) {
+		unsigned long now = jiffies, slice_left;
+
+		slice_left = cfqq->slice_end - now - 1;
+		cfqd->timer.expires = now + min(cfqd->cfq_slice_idle, (unsigned int)slice_left);
+		add_timer(&cfqd->timer);
+	}
+
+	return 1;
+}
+
+/*
+ * get next queue for service
+ */
+static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
+{
+	struct cfq_queue *cfqq = cfqd->active_queue;
+	unsigned long now = jiffies;
+
+	cfqq = cfqd->active_queue;
+	if (!cfqq)
+		goto new_queue;
+
+	if (cfqq->must_dispatch)
+		goto must_queue;
+
+	/*
+	 * slice has expired
+	 */
+	if (time_after(jiffies, cfqq->slice_end))
+		goto new_queue;
+
+	/*
+	 * if queue has requests, dispatch one. if not, check if
+	 * enough slice is left to wait for one
+	 */
+must_queue:
+	if (!RB_EMPTY(&cfqq->sort_list))
+		goto keep_queue;
+	else if (time_before(now, cfqq->slice_end)) {
+		if (cfq_arm_slice_timer(cfqd, cfqq, NULL))
+			return NULL;
+	}
+
+new_queue:
+	cfq_slice_expired(cfqd);
+	cfq_set_active_queue(cfqd);
+keep_queue:
+	return cfqd->active_queue;
+}
+
 /*
  * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues,
  * this function sector sorts the selected request to minimize seeks. we start
@@ -741,9 +814,7 @@
 	list_del(&crq->request->queuelist);
 
 	last = cfqd->last_sector;
-	while ((entry = entry->prev) != head) {
-		__rq = list_entry_rq(entry);
-
+	list_for_each_entry_reverse(__rq, head, queuelist) {
 		if (blk_barrier_rq(crq->request))
 			break;
 		if (!blk_fs_request(crq->request))
@@ -777,95 +848,104 @@
 	if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire))
 		return NULL;
 
-	crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist));
-	if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
-		cfqq->last_fifo_expire = now;
-		return crq;
+	if (reads) {
+		crq = RQ_DATA(list_entry_fifo(cfqq->fifo[READ].next));
+		if (time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
+			cfqq->last_fifo_expire = now;
+			return crq;
+		}
 	}
 
-	crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist));
-	if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
-		cfqq->last_fifo_expire = now;
-		return crq;
+	if (writes) {
+		crq = RQ_DATA(list_entry_fifo(cfqq->fifo[WRITE].next));
+		if (time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
+			cfqq->last_fifo_expire = now;
+			return crq;
+		}
 	}
 
 	return NULL;
 }
 
-/*
- * dispatch a single request from given queue
- */
-static inline void
-cfq_dispatch_request(request_queue_t *q, struct cfq_data *cfqd,
-		     struct cfq_queue *cfqq)
+static int
+__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+			int max_dispatch)
 {
-	struct cfq_rq *crq;
+	int dispatched = 0, sync = 0;
+
+	BUG_ON(RB_EMPTY(&cfqq->sort_list));
+
+	do {
+		struct cfq_rq *crq;
+
+		/*
+		 * follow expired path, else get first next available
+		 */
+		if ((crq = cfq_check_fifo(cfqq)) == NULL) {
+			if (cfqd->find_best_crq)
+				crq = cfqq->next_crq;
+			else
+				crq = rb_entry_crq(rb_first(&cfqq->sort_list));
+		}
+
+		cfqd->last_sector = crq->request->sector + crq->request->nr_sectors;
+
+		/*
+		 * finally, insert request into driver list
+		 */
+		cfq_dispatch_sort(cfqd->queue, crq);
+
+		cfqd->dispatch_slice++;
+		dispatched++;
+		sync += crq->is_sync;
+
+		if (RB_EMPTY(&cfqq->sort_list))
+			break;
+
+	} while (dispatched < max_dispatch);
 
 	/*
-	 * follow expired path, else get first next available
+	 * if slice end isn't set yet, set it. if at least one request was
+	 * sync, use the sync time slice value
 	 */
-	if ((crq = cfq_check_fifo(cfqq)) == NULL) {
-		if (cfqd->find_best_crq)
-			crq = cfqq->next_crq;
-		else
-			crq = rb_entry_crq(rb_first(&cfqq->sort_list));
-	}
-
-	cfqd->last_sector = crq->request->sector + crq->request->nr_sectors;
+	if (!cfqq->slice_end)
+		cfqq->slice_end = cfqd->cfq_slice[!!sync] + jiffies;
 
 	/*
-	 * finally, insert request into driver list
+	 * expire an async queue immediately if it has used up its slice
 	 */
-	cfq_dispatch_sort(q, crq);
+	if (!sync && cfqd->dispatch_slice >= cfqd->cfq_slice_async_rq)
+		cfq_slice_expired(cfqd);
+
+	return dispatched;
 }
 
 static int cfq_dispatch_requests(request_queue_t *q, int max_dispatch)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq;
-	struct list_head *entry, *tmp;
-	int queued, busy_queues, first_round;
 
 	if (list_empty(&cfqd->rr_list))
 		return 0;
 
-	queued = 0;
-	first_round = 1;
-restart:
-	busy_queues = 0;
-	list_for_each_safe(entry, tmp, &cfqd->rr_list) {
-		cfqq = list_entry_cfqq(entry);
-
-		BUG_ON(RB_EMPTY(&cfqq->sort_list));
-
-		/*
-		 * first round of queueing, only select from queues that
-		 * don't already have io in-flight
-		 */
-		if (first_round && cfqq->in_flight)
-			continue;
-
-		cfq_dispatch_request(q, cfqd, cfqq);
-
-		if (!RB_EMPTY(&cfqq->sort_list))
-			busy_queues++;
-
-		queued++;
-	}
-
-	if ((queued < max_dispatch) && (busy_queues || first_round)) {
-		first_round = 0;
-		goto restart;
+	cfqq = cfq_select_queue(cfqd);
+	if (cfqq) {
+		cfqq->wait_request = 0;
+		cfqq->must_dispatch = 0;
+		del_timer(&cfqd->timer);
+		return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
 	}
 
-	return queued;
+	return 0;
 }
 
 static inline void cfq_account_dispatch(struct cfq_rq *crq)
 {
 	struct cfq_queue *cfqq = crq->cfq_queue;
 	struct cfq_data *cfqd = cfqq->cfqd;
-	unsigned long now, elapsed;
+
+	if (unlikely(!blk_fs_request(crq->request)))
+		return;
 
 	/*
 	 * accounted bit is necessary since some drivers will call
@@ -874,37 +954,9 @@
 	if (crq->accounted)
 		return;
 
-	now = jiffies;
-	if (cfqq->service_start == ~0UL)
-		cfqq->service_start = now;
-
-	/*
-	 * on drives with tagged command queueing, command turn-around time
-	 * doesn't necessarily reflect the time spent processing this very
-	 * command inside the drive. so do the accounting differently there,
-	 * by just sorting on the number of requests
-	 */
-	if (cfqd->cfq_tagged) {
-		if (time_after(now, cfqq->service_start + cfq_service)) {
-			cfqq->service_start = now;
-			cfqq->service_used /= 10;
-		}
-
-		cfqq->service_used++;
-		cfq_sort_rr_list(cfqq, 0);
-	}
-
-	elapsed = now - crq->queue_start;
-	if (elapsed > max_elapsed_dispatch)
-		max_elapsed_dispatch = elapsed;
-
 	crq->accounted = 1;
-	crq->service_start = now;
-
-	if (++cfqd->rq_in_driver >= CFQ_MAX_TAG && !cfqd->cfq_tagged) {
-		cfqq->cfqd->cfq_tagged = 1;
-		printk("cfq: depth %d reached, tagging now on\n", CFQ_MAX_TAG);
-	}
+	crq->service_start = jiffies;
+	cfqd->rq_in_driver++;
 }
 
 static inline void
@@ -912,24 +964,24 @@
 {
 	struct cfq_data *cfqd = cfqq->cfqd;
 
-	WARN_ON(!cfqd->rq_in_driver);
-	cfqd->rq_in_driver--;
-
-	if (!cfqd->cfq_tagged) {
-		unsigned long now = jiffies;
-		unsigned long duration = now - crq->service_start;
-
-		if (time_after(now, cfqq->service_start + cfq_service)) {
-			cfqq->service_start = now;
-			cfqq->service_used >>= 3;
-		}
+	if (crq->accounted) {
+		WARN_ON(!cfqd->rq_in_driver);
+		cfqd->rq_in_driver--;
+	}
 
-		cfqq->service_used += duration;
-		cfq_sort_rr_list(cfqq, 0);
+	/*
+	 * queue was preempted while this request was servicing
+	 */
+	if (cfqd->active_queue != cfqq)
+		return;
 
-		if (duration > max_elapsed_crq)
-			max_elapsed_crq = duration;
-	}
+	/*
+	 * this is still the active queue. if we have nothing to do and no
+	 * more pending requests in flight, wait for a new sync request if
+	 * this request was sync itself
+	 */
+	if (RB_EMPTY(&cfqq->sort_list) && crq->is_sync && !cfqq->in_flight)
+		cfq_arm_slice_timer(cfqd, cfqq, crq->io_context);
 }
 
 static struct request *cfq_next_request(request_queue_t *q)
@@ -937,6 +989,9 @@
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct request *rq;
 
+	if (cfqd->rq_in_driver >= cfqd->cfq_max_depth)
+		return NULL;
+
 	if (!list_empty(&q->queue_head)) {
 		struct cfq_rq *crq;
 dispatch:
@@ -964,6 +1019,8 @@
  */
 static void cfq_put_queue(struct cfq_queue *cfqq)
 {
+	struct cfq_data *cfqd = cfqq->cfqd;
+
 	BUG_ON(!atomic_read(&cfqq->ref));
 
 	if (!atomic_dec_and_test(&cfqq->ref))
@@ -972,6 +1029,9 @@
 	BUG_ON(rb_first(&cfqq->sort_list));
 	BUG_ON(cfqq->on_rr);
 
+	if (unlikely(cfqd->active_queue == cfqq))
+		cfqd->active_queue = NULL;
+
 	cfq_put_cfqd(cfqq->cfqd);
 
 	/*
@@ -1033,6 +1093,18 @@
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
 
+static void cfq_queue_remove(struct cfq_queue *cfqq)
+{
+	struct cfq_data *cfqd = cfqq->cfqd;
+
+	/*
+	 * don't waste time waiting for the timer to expire for
+	 * a dead process
+	 */
+	if (cfqq == cfqd->active_queue && del_timer(&cfqd->timer))
+		kblockd_schedule_work(&cfqd->unplug_work);
+}
+
 static void cfq_free_io_context(struct cfq_io_context *cic)
 {
 	kmem_cache_free(cfq_ioc_pool, cic);
@@ -1044,6 +1116,7 @@
 static void cfq_exit_io_context(struct cfq_io_context *cic)
 {
 	struct cfq_queue *cfqq = cic->cfqq;
+	struct cfq_data *cfqd = cfqq->cfqd;
 	struct list_head *entry = &cic->list;
 	request_queue_t *q;
 	unsigned long flags;
@@ -1060,12 +1133,14 @@
 
 		q = __cic->cfqq->cfqd->queue;
 		spin_lock(q->queue_lock);
+		cfq_queue_remove(__cic->cfqq);
 		cfq_put_queue(__cic->cfqq);
 		spin_unlock(q->queue_lock);
 	}
 
-	q = cfqq->cfqd->queue;
+	q = cfqd->queue;
 	spin_lock(q->queue_lock);
+	cfq_queue_remove(cfqq);
 	cfq_put_queue(cfqq);
 	spin_unlock(q->queue_lock);
 
@@ -1117,6 +1192,7 @@
 		cic->ioc = ioc;
 		cic->cfqq = __cfqq;
 		atomic_inc(&__cfqq->ref);
+		atomic_inc(&cfqd->ref);
 	} else {
 		struct cfq_io_context *__cic;
 		unsigned long flags;
@@ -1159,10 +1235,10 @@
 		__cic->ioc = ioc;
 		__cic->cfqq = __cfqq;
 		atomic_inc(&__cfqq->ref);
+		atomic_inc(&cfqd->ref);
 		spin_lock_irqsave(&ioc->lock, flags);
 		list_add(&__cic->list, &cic->list);
 		spin_unlock_irqrestore(&ioc->lock, flags);
-
 		cic = __cic;
 		*cfqq = __cfqq;
 	}
@@ -1199,8 +1275,11 @@
 			new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
 			spin_lock_irq(cfqd->queue->queue_lock);
 			goto retry;
-		} else
-			goto out;
+		} else {
+			cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+			if (!cfqq)
+				goto out;
+		}
 
 		memset(cfqq, 0, sizeof(*cfqq));
 
@@ -1216,7 +1295,7 @@
 		cfqq->cfqd = cfqd;
 		atomic_inc(&cfqd->ref);
 		cfqq->key_type = cfqd->key_type;
-		cfqq->service_start = ~0UL;
+		cfqq->service_last = 0;
 	}
 
 	if (new_cfqq)
@@ -1243,14 +1322,31 @@
 
 static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq)
 {
-	crq->is_sync = 0;
-	if (rq_data_dir(crq->request) == READ || current->flags & PF_SYNCWRITE)
-		crq->is_sync = 1;
+	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct request *rq = crq->request;
+
+	crq->is_sync = rq_data_dir(rq) == READ || blk_rq_sync(rq);
 
 	cfq_add_crq_rb(crq);
 	crq->queue_start = jiffies;
 
-	list_add_tail(&crq->request->queuelist, &crq->cfq_queue->fifo[crq->is_sync]);
+	list_add_tail(&rq->queuelist, &cfqq->fifo[crq->is_sync]);
+
+	/*
+	 * if we are waiting for a request for this queue, let it rip
+	 * immediately and flag that we must not expire this queue just now
+	 */
+	if (cfqq->wait_request && cfqq == cfqd->active_queue) {
+		request_queue_t *q = cfqd->queue;
+
+		cfqq->must_dispatch = 1;
+		del_timer(&cfqd->timer);
+
+		if (!blk_queue_plugged(q))
+			q->request_fn(q);
+		else
+			__generic_unplug_device(q);
+	}
 }
 
 static void
@@ -1339,32 +1435,31 @@
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq;
 	int ret = ELV_MQUEUE_MAY;
+	int limit;
 
 	if (current->flags & PF_MEMALLOC)
 		return ELV_MQUEUE_MAY;
 
 	cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(cfqd, current));
-	if (cfqq) {
-		int limit = cfqd->max_queued;
-
-		if (cfqq->allocated[rw] < cfqd->cfq_queued)
-			return ELV_MQUEUE_MUST;
-
-		if (cfqd->busy_queues)
-			limit = q->nr_requests / cfqd->busy_queues;
+	if (unlikely(!cfqq))
+		return ELV_MQUEUE_MAY;
 
-		if (limit < cfqd->cfq_queued)
-			limit = cfqd->cfq_queued;
-		else if (limit > cfqd->max_queued)
-			limit = cfqd->max_queued;
+	if (cfqq->allocated[rw] < cfqd->cfq_queued)
+		return ELV_MQUEUE_MUST;
+	if (cfqq->wait_request)
+		return ELV_MQUEUE_MUST;
+
+	limit = cfqd->max_queued;
+	if (cfqd->busy_queues)
+		limit = q->nr_requests / cfqd->busy_queues;
+
+	if (limit < cfqd->cfq_queued)
+		limit = cfqd->cfq_queued;
+	else if (limit > cfqd->max_queued)
+		limit = cfqd->max_queued;
 
-		if (cfqq->allocated[rw] >= limit) {
-			if (limit > cfqq->alloc_limit[rw])
-				cfqq->alloc_limit[rw] = limit;
-
-			ret = ELV_MQUEUE_NO;
-		}
-	}
+	if (cfqq->allocated[rw] >= limit)
+		ret = ELV_MQUEUE_NO;
 
 	return ret;
 }
@@ -1372,12 +1467,13 @@
 static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
 {
 	struct request_list *rl = &q->rq;
-	const int write = waitqueue_active(&rl->wait[WRITE]);
-	const int read = waitqueue_active(&rl->wait[READ]);
+	const int writes = waitqueue_active(&rl->wait[WRITE]);
+	const int reads = waitqueue_active(&rl->wait[READ]);
+	struct cfq_data *cfqd = q->elevator->elevator_data;
 
-	if (read && cfqq->allocated[READ] < cfqq->alloc_limit[READ])
+	if (reads && cfqq->allocated[READ] < cfqd->max_queued)
 		wake_up(&rl->wait[READ]);
-	if (write && cfqq->allocated[WRITE] < cfqq->alloc_limit[WRITE])
+	if (writes && cfqq->allocated[WRITE] < cfqd->max_queued)
 		wake_up(&rl->wait[WRITE]);
 }
 
@@ -1391,16 +1487,17 @@
 
 	if (crq) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
+		const int rw = rq_data_dir(rq);
 
 		BUG_ON(q->last_merge == rq);
 		BUG_ON(!hlist_unhashed(&crq->hash));
 
+		BUG_ON(!cfqq->allocated[rw]);
+		cfqq->allocated[rw]--;
+
 		if (crq->io_context)
 			put_io_context(crq->io_context->ioc);
 
-		BUG_ON(!cfqq->allocated[crq->is_write]);
-		cfqq->allocated[crq->is_write]--;
-
 		mempool_free(crq, cfqd->crq_pool);
 		rq->elevator_private = NULL;
 
@@ -1470,9 +1567,7 @@
 		crq->io_context = cic;
 		crq->service_start = crq->queue_start = 0;
 		crq->in_flight = crq->accounted = crq->is_sync = 0;
-		crq->is_write = rw;
 		rq->elevator_private = crq;
-		cfqq->alloc_limit[rw] = 0;
 		return 0;
 	}
 
@@ -1486,6 +1581,44 @@
 	return 1;
 }
 
+static void cfq_kick_queue(void *data)
+{
+	request_queue_t *q = data;
+
+	blk_run_queue(q);
+}
+
+static void cfq_schedule_timer(unsigned long data)
+{
+	struct cfq_data *cfqd = (struct cfq_data *) data;
+	struct cfq_queue *cfqq;
+	unsigned long flags;
+
+	spin_lock_irqsave(cfqd->queue->queue_lock, flags);
+
+	if ((cfqq = cfqd->active_queue) != NULL) {
+		/*
+		 * expired
+		 */
+		if (time_after(jiffies, cfqq->slice_end))
+			goto out;
+
+		/*
+		 * not expired and it has a request pending, let it dispatch
+		 */
+		if (!RB_EMPTY(&cfqq->sort_list)) {
+			cfqq->must_dispatch = 1;
+			goto out_cont;
+		}
+	}
+
+out:
+	cfq_slice_expired(cfqd);
+out_cont:
+	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+	kblockd_schedule_work(&cfqd->unplug_work);
+}
+
 static void cfq_put_cfqd(struct cfq_data *cfqd)
 {
 	request_queue_t *q = cfqd->queue;
@@ -1494,6 +1627,8 @@
 	if (!atomic_dec_and_test(&cfqd->ref))
 		return;
 
+	blk_sync_queue(q);
+
 	/*
 	 * kill spare queue, getting it means we have two refences to it.
 	 * drop both
@@ -1502,6 +1637,11 @@
 	cfqq = __cfq_get_queue(cfqd, CFQ_KEY_SPARE, GFP_ATOMIC);
 	cfq_put_queue(cfqq);
 	cfq_put_queue(cfqq);
+
+	/*
+	 * restore ->nr_requests
+	 */
+	q->nr_requests = BLKDEV_MAX_RQ;
 	spin_unlock_irq(q->queue_lock);
 
 	blk_put_queue(q);
@@ -1565,10 +1705,17 @@
 	 * some requests. fairness is handled differently
 	 */
 	q->nr_requests = 1024;
-	cfqd->max_queued = q->nr_requests / 16;
+	cfqd->max_queued = q->nr_requests / 8;
 	q->nr_batching = cfq_queued;
-	cfqd->key_type = CFQ_KEY_TGID;
+	cfqd->key_type = CFQ_KEY_PID;
 	cfqd->find_best_crq = 1;
+
+	init_timer(&cfqd->timer);
+	cfqd->timer.function = cfq_schedule_timer;
+	cfqd->timer.data = (unsigned long) cfqd;
+
+	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
+
 	atomic_set(&cfqd->ref, 1);
 
 	cfqd->cfq_queued = cfq_queued;
@@ -1578,6 +1725,11 @@
 	cfqd->cfq_fifo_batch_expire = cfq_fifo_rate;
 	cfqd->cfq_back_max = cfq_back_max;
 	cfqd->cfq_back_penalty = cfq_back_penalty;
+	cfqd->cfq_slice[0] = cfq_slice_async;
+	cfqd->cfq_slice[1] = cfq_slice_sync;
+	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
+	cfqd->cfq_slice_idle = cfq_slice_idle;
+	cfqd->cfq_max_depth = cfq_max_depth;
 
 	return 0;
 out_spare:
@@ -1624,7 +1776,6 @@
 	return -ENOMEM;
 }
 
-
 /*
  * sysfs parts below -->
  */
@@ -1650,13 +1801,6 @@
 }
 
 static ssize_t
-cfq_clear_elapsed(struct cfq_data *cfqd, const char *page, size_t count)
-{
-	max_elapsed_dispatch = max_elapsed_crq = 0;
-	return count;
-}
-
-static ssize_t
 cfq_set_key_type(struct cfq_data *cfqd, const char *page, size_t count)
 {
 	spin_lock_irq(cfqd->queue->queue_lock);
@@ -1664,6 +1808,8 @@
 		cfqd->key_type = CFQ_KEY_PGID;
 	else if (!strncmp(page, "tgid", 4))
 		cfqd->key_type = CFQ_KEY_TGID;
+	else if (!strncmp(page, "pid", 3))
+		cfqd->key_type = CFQ_KEY_PID;
 	else if (!strncmp(page, "uid", 3))
 		cfqd->key_type = CFQ_KEY_UID;
 	else if (!strncmp(page, "gid", 3))
@@ -1704,6 +1850,11 @@
 SHOW_FUNCTION(cfq_find_best_show, cfqd->find_best_crq, 0);
 SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0);
 SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0);
+SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
+SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
+SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
+SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
+SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -1729,6 +1880,11 @@
 STORE_FUNCTION(cfq_find_best_store, &cfqd->find_best_crq, 0, 1, 0);
 STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
 STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);
+STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
+STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
+STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
+STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
+STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 2, UINT_MAX, 0);
 #undef STORE_FUNCTION
 
 static struct cfq_fs_entry cfq_quantum_entry = {
@@ -1771,15 +1927,36 @@
 	.show = cfq_back_penalty_show,
 	.store = cfq_back_penalty_store,
 };
-static struct cfq_fs_entry cfq_clear_elapsed_entry = {
-	.attr = {.name = "clear_elapsed", .mode = S_IWUSR },
-	.store = cfq_clear_elapsed,
+static struct cfq_fs_entry cfq_slice_sync_entry = {
+	.attr = {.name = "slice_sync", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_slice_sync_show,
+	.store = cfq_slice_sync_store,
+};
+static struct cfq_fs_entry cfq_slice_async_entry = {
+	.attr = {.name = "slice_async", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_slice_async_show,
+	.store = cfq_slice_async_store,
+};
+static struct cfq_fs_entry cfq_slice_async_rq_entry = {
+	.attr = {.name = "slice_async_rq", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_slice_async_rq_show,
+	.store = cfq_slice_async_rq_store,
+};
+static struct cfq_fs_entry cfq_slice_idle_entry = {
+	.attr = {.name = "slice_idle", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_slice_idle_show,
+	.store = cfq_slice_idle_store,
 };
 static struct cfq_fs_entry cfq_key_type_entry = {
 	.attr = {.name = "key_type", .mode = S_IRUGO | S_IWUSR },
 	.show = cfq_read_key_type,
 	.store = cfq_set_key_type,
 };
+static struct cfq_fs_entry cfq_max_depth_entry = {
+	.attr = {.name = "max_depth", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_max_depth_show,
+	.store = cfq_max_depth_store,
+};
 
 static struct attribute *default_attrs[] = {
 	&cfq_quantum_entry.attr,
@@ -1791,7 +1968,11 @@
 	&cfq_find_best_entry.attr,
 	&cfq_back_max_entry.attr,
 	&cfq_back_penalty_entry.attr,
-	&cfq_clear_elapsed_entry.attr,
+	&cfq_slice_sync_entry.attr,
+	&cfq_slice_async_entry.attr,
+	&cfq_slice_async_rq_entry.attr,
+	&cfq_slice_idle_entry.attr,
+	&cfq_max_depth_entry.attr,
 	NULL,
 };
 
@@ -1856,7 +2037,7 @@
 	.elevator_owner =	THIS_MODULE,
 };
 
-int cfq_init(void)
+static int __init cfq_init(void)
 {
 	int ret;
 
@@ -1864,17 +2045,34 @@
 		return -ENOMEM;
 
 	ret = elv_register(&iosched_cfq);
-	if (!ret) {
-		__module_get(THIS_MODULE);
-		return 0;
-	}
+	if (ret)
+		cfq_slab_kill();
 
-	cfq_slab_kill();
 	return ret;
 }
 
 static void __exit cfq_exit(void)
 {
+	struct task_struct *g, *p;
+	unsigned long flags;
+
+	read_lock_irqsave(&tasklist_lock, flags);
+
+	/*
+	 * iterate each process in the system, removing our io_context
+	 */
+	do_each_thread(g, p) {
+		struct io_context *ioc = p->io_context;
+
+		if (ioc && ioc->cic) {
+			ioc->cic->exit(ioc->cic);
+			cfq_free_io_context(ioc->cic);
+			ioc->cic = NULL;
+		}
+	} while_each_thread(g, p);
+
+	read_unlock_irqrestore(&tasklist_lock, flags);
+
 	cfq_slab_kill();
 	elv_unregister(&iosched_cfq);
 }
diff -urP -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.10-rc2-mm4/drivers/block/ll_rw_blk.c linux-2.6.10-rc2-mm4/drivers/block/ll_rw_blk.c
--- /opt/kernel/linux-2.6.10-rc2-mm4/drivers/block/ll_rw_blk.c	2004-12-06 16:07:50.000000000 +0100
+++ linux-2.6.10-rc2-mm4/drivers/block/ll_rw_blk.c	2004-12-06 16:11:27.000000000 +0100
@@ -1257,11 +1257,7 @@
 	if (!blk_remove_plug(q))
 		return;
 
-	/*
-	 * was plugged, fire request_fn if queue has stuff to do
-	 */
-	if (elv_next_request(q))
-		q->request_fn(q);
+	q->request_fn(q);
 }
 EXPORT_SYMBOL(__generic_unplug_device);
 
@@ -2153,7 +2149,6 @@
 		return;
 
 	req->rq_status = RQ_INACTIVE;
-	req->q = NULL;
 	req->rl = NULL;
 
 	/*
@@ -2447,6 +2442,9 @@
 	if (barrier)
 		req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 
+	if (bio_sync(bio))
+		req->flags |= REQ_SYNCHRONOUS;
+
 	req->errors = 0;
 	req->hard_sector = req->sector = sector;
 	req->hard_nr_sectors = req->nr_sectors = nr_sectors;
@@ -2503,6 +2501,7 @@
 {
 	struct request_list *rl = &q->rq;
 	struct request *rq;
+	int requeued = 0;
 
 	spin_lock_irq(q->queue_lock);
 	clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
@@ -2511,9 +2510,13 @@
 		rq = list_entry_rq(q->drain_list.next);
 
 		list_del_init(&rq->queuelist);
-		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
+		elv_requeue_request(q, rq);
+		requeued++;
 	}
 
+	if (requeued)
+		q->request_fn(q);
+
 	spin_unlock_irq(q->queue_lock);
 
 	wake_up(&rl->wait[0]);
@@ -3070,6 +3073,7 @@
 	local_irq_save(flags);
 	ioc = current->io_context;
 	current->io_context = NULL;
+	ioc->task = NULL;
 	local_irq_restore(flags);
 
 	if (ioc->aic && ioc->aic->exit)
@@ -3104,7 +3108,7 @@
 	ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
 	if (ret) {
 		atomic_set(&ret->refcount, 1);
-		ret->pid = tsk->pid;
+		ret->task = current;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
diff -urP -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.10-rc2-mm4/fs/buffer.c linux-2.6.10-rc2-mm4/fs/buffer.c
--- /opt/kernel/linux-2.6.10-rc2-mm4/fs/buffer.c	2004-12-06 16:08:06.000000000 +0100
+++ linux-2.6.10-rc2-mm4/fs/buffer.c	2004-12-06 16:11:16.000000000 +0100
@@ -347,7 +347,6 @@
 		goto out_putf;
 	}
 
-	current->flags |= PF_SYNCWRITE;
 	ret = filemap_fdatawrite(mapping);
 
 	/*
@@ -362,7 +361,6 @@
 	err = filemap_fdatawait(mapping);
 	if (!ret)
 		ret = err;
-	current->flags &= ~PF_SYNCWRITE;
 
 out_putf:
 	fput(file);
@@ -387,7 +385,6 @@
 
 	mapping = file->f_mapping;
 
-	current->flags |= PF_SYNCWRITE;
 	ret = filemap_fdatawrite(mapping);
 	down(&mapping->host->i_sem);
 	err = file->f_op->fsync(file, file->f_dentry, 1);
@@ -397,7 +394,6 @@
 	err = filemap_fdatawait(mapping);
 	if (!ret)
 		ret = err;
-	current->flags &= ~PF_SYNCWRITE;
 
 out_putf:
 	fput(file);
diff -urP -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.10-rc2-mm4/fs/fs-writeback.c linux-2.6.10-rc2-mm4/fs/fs-writeback.c
--- /opt/kernel/linux-2.6.10-rc2-mm4/fs/fs-writeback.c	2004-12-06 16:08:06.000000000 +0100
+++ linux-2.6.10-rc2-mm4/fs/fs-writeback.c	2004-12-06 16:11:27.000000000 +0100
@@ -630,7 +630,6 @@
 	int need_write_inode_now = 0;
 	int err2;
 
-	current->flags |= PF_SYNCWRITE;
 	if (what & OSYNC_DATA)
 		err = filemap_fdatawrite(mapping);
 	if (what & (OSYNC_METADATA|OSYNC_DATA)) {
@@ -643,7 +642,6 @@
 		if (!err)
 			err = err2;
 	}
-	current->flags &= ~PF_SYNCWRITE;
 
 	spin_lock(&inode_lock);
 	if ((inode->i_state & I_DIRTY) &&
diff -urP -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.10-rc2-mm4/fs/mpage.c linux-2.6.10-rc2-mm4/fs/mpage.c
--- /opt/kernel/linux-2.6.10-rc2-mm4/fs/mpage.c	2004-12-06 16:08:06.000000000 +0100
+++ linux-2.6.10-rc2-mm4/fs/mpage.c	2004-12-06 16:11:27.000000000 +0100
@@ -87,11 +87,16 @@
 	return 0;
 }
 
-struct bio *mpage_bio_submit(int rw, struct bio *bio)
+struct bio *
+mpage_bio_submit(int rw, struct bio *bio, struct writeback_control *wbc)
 {
 	bio->bi_end_io = mpage_end_io_read;
-	if (rw == WRITE)
+	if (rw == WRITE) {
+		if (wbc->sync_mode == WB_SYNC_ALL)
+			rw = WRITE_SYNC;
+
 		bio->bi_end_io = mpage_end_io_write;
+	}
 	submit_bio(rw, bio);
 	return NULL;
 }
@@ -285,7 +290,7 @@
 	 * This page will go to BIO.  Do we need to send this BIO off first?
 	 */
 	if (bio && (*last_block_in_bio != blocks[0] - 1))
-		bio = mpage_bio_submit(READ, bio);
+		bio = mpage_bio_submit(READ, bio, NULL);
 
 alloc_new:
 	if (bio == NULL) {
@@ -298,12 +303,12 @@
 
 	length = first_hole << blkbits;
 	if (bio_add_page(bio, page, length, 0) < length) {
-		bio = mpage_bio_submit(READ, bio);
+		bio = mpage_bio_submit(READ, bio, NULL);
 		goto alloc_new;
 	}
 
 	if (buffer_boundary(&bh) || (first_hole != blocks_per_page))
-		bio = mpage_bio_submit(READ, bio);
+		bio = mpage_bio_submit(READ, bio, NULL);
 	else
 		*last_block_in_bio = blocks[blocks_per_page - 1];
 out:
@@ -311,7 +316,7 @@
 
 confused:
 	if (bio)
-		bio = mpage_bio_submit(READ, bio);
+		bio = mpage_bio_submit(READ, bio, NULL);
 	if (!PageUptodate(page))
 	        block_read_full_page(page, get_block);
 	else
@@ -348,7 +353,7 @@
 	pagevec_lru_add(&lru_pvec);
 	BUG_ON(!list_empty(pages));
 	if (bio)
-		mpage_bio_submit(READ, bio);
+		mpage_bio_submit(READ, bio, NULL);
 	return 0;
 }
 EXPORT_SYMBOL(mpage_readpages);
@@ -364,7 +369,7 @@
 	bio = do_mpage_readpage(bio, page, 1,
 			&last_block_in_bio, get_block);
 	if (bio)
-		mpage_bio_submit(READ, bio);
+		mpage_bio_submit(READ, bio, NULL);
 	return 0;
 }
 EXPORT_SYMBOL(mpage_readpage);
@@ -517,7 +522,7 @@
 	 * This page will go to BIO.  Do we need to send this BIO off first?
 	 */
 	if (bio && *last_block_in_bio != blocks[0] - 1)
-		bio = mpage_bio_submit(WRITE, bio);
+		bio = mpage_bio_submit(WRITE, bio, wbc);
 
 alloc_new:
 	if (bio == NULL) {
@@ -534,7 +539,7 @@
 	 */
 	length = first_unmapped << blkbits;
 	if (bio_add_page(bio, page, length, 0) < length) {
-		bio = mpage_bio_submit(WRITE, bio);
+		bio = mpage_bio_submit(WRITE, bio, wbc);
 		goto alloc_new;
 	}
 
@@ -567,7 +572,7 @@
 	set_page_writeback(page);
 	unlock_page(page);
 	if (boundary || (first_unmapped != blocks_per_page)) {
-		bio = mpage_bio_submit(WRITE, bio);
+		bio = mpage_bio_submit(WRITE, bio, wbc);
 		if (boundary_block) {
 			write_boundary_block(boundary_bdev,
 					boundary_block, 1 << blkbits);
@@ -579,7 +584,7 @@
 
 confused:
 	if (bio)
-		bio = mpage_bio_submit(WRITE, bio);
+		bio = mpage_bio_submit(WRITE, bio, wbc);
 	*ret = page->mapping->a_ops->writepage(page, wbc);
 	/*
 	 * The caller has a ref on the inode, so *mapping is stable
@@ -731,7 +736,7 @@
 	if (!is_range)
 		mapping->writeback_index = index;
 	if (bio)
-		mpage_bio_submit(WRITE, bio);
+		mpage_bio_submit(WRITE, bio, wbc);
 	return ret;
 }
 EXPORT_SYMBOL(mpage_writepages);
diff -urP -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.10-rc2-mm4/include/linux/blkdev.h linux-2.6.10-rc2-mm4/include/linux/blkdev.h
--- /opt/kernel/linux-2.6.10-rc2-mm4/include/linux/blkdev.h	2004-12-06 16:07:55.000000000 +0100
+++ linux-2.6.10-rc2-mm4/include/linux/blkdev.h	2004-12-06 16:11:27.000000000 +0100
@@ -73,7 +73,7 @@
  */
 struct io_context {
 	atomic_t refcount;
-	pid_t pid;
+	struct task_struct *task;
 
 	/*
 	 * For request batching
@@ -209,6 +209,7 @@
 	__REQ_PM_SHUTDOWN,	/* shutdown request */
 	__REQ_BAR_PREFLUSH,	/* barrier pre-flush done */
 	__REQ_BAR_POSTFLUSH,	/* barrier post-flush */
+	__REQ_SYNCHRONOUS,	/* sync request */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -236,6 +237,7 @@
 #define REQ_PM_SHUTDOWN	(1 << __REQ_PM_SHUTDOWN)
 #define REQ_BAR_PREFLUSH	(1 << __REQ_BAR_PREFLUSH)
 #define REQ_BAR_POSTFLUSH	(1 << __REQ_BAR_POSTFLUSH)
+#define REQ_SYNCHRONOUS	(1 << __REQ_SYNCHRONOUS)
 
 /*
  * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
@@ -406,6 +408,7 @@
 #define blk_pc_request(rq)	((rq)->flags & REQ_BLOCK_PC)
 #define blk_noretry_request(rq)	((rq)->flags & REQ_FAILFAST)
 #define blk_rq_started(rq)	((rq)->flags & REQ_STARTED)
+#define blk_rq_sync(rq)		((rq)->flags & REQ_SYNCHRONOUS)
 
 #define blk_account_rq(rq)	(blk_rq_started(rq) && blk_fs_request(rq))
 
diff -urP -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.10-rc2-mm4/include/linux/sched.h linux-2.6.10-rc2-mm4/include/linux/sched.h
--- /opt/kernel/linux-2.6.10-rc2-mm4/include/linux/sched.h	2004-12-06 16:07:55.000000000 +0100
+++ linux-2.6.10-rc2-mm4/include/linux/sched.h	2004-12-06 16:11:27.000000000 +0100
@@ -724,8 +724,7 @@
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
 #define PF_SWAPOFF	0x00080000	/* I am in swapoff */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
-#define PF_SYNCWRITE	0x00200000	/* I am doing a sync write */
-#define PF_BORROWED_MM	0x00400000	/* I am a kthread doing use_mm */
+#define PF_BORROWED_MM	0x00200000	/* I am a kthread doing use_mm */
 
 #ifdef CONFIG_SMP
 extern int set_cpus_allowed(task_t *p, cpumask_t new_mask);
@@ -753,6 +752,7 @@
 extern int task_nice(const task_t *p);
 extern int task_curr(const task_t *p);
 extern int idle_cpu(int cpu);
+extern unsigned long task_will_schedule_at(const task_t *p);
 
 void yield(void);
 
diff -urP -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.10-rc2-mm4/kernel/sched.c linux-2.6.10-rc2-mm4/kernel/sched.c
--- /opt/kernel/linux-2.6.10-rc2-mm4/kernel/sched.c	2004-12-06 16:08:12.000000000 +0100
+++ linux-2.6.10-rc2-mm4/kernel/sched.c	2004-12-06 16:11:27.000000000 +0100
@@ -580,7 +580,7 @@
 static void dequeue_task(struct task_struct *p, prio_array_t *array)
 {
 	array->nr_active--;
-	list_del(&p->run_list);
+	list_del_init(&p->run_list);
 	if (list_empty(array->queue + p->prio))
 		__clear_bit(p->prio, array->bitmap);
 }
@@ -823,6 +823,41 @@
 	return cpu_curr(task_cpu(p)) == p;
 }
 
+/**
+ * task_will_schedule_at - in how many ticks will the task run, most likely
+ * @p: the task in question
+ */
+unsigned long task_will_schedule_at(const task_t *p)
+{
+	/*
+	 * Task is executing right now
+	 */
+	if (task_curr(p))
+		return 0;
+
+	/*
+	 * Task is not executing but on a runqueue - try to guess
+	 * how much time it will take for it to run again, but using
+	 * the current task's remaining ticks. This is not accurate,
+	 * but a good guess. (We use the min() to avoid the small race
+	 * that is due to us dereferencing the current task without
+	 * locking)
+	 */
+	if (p->array)
+		return min(cpu_curr(task_cpu(p))->time_slice,
+					(unsigned int)MAX_SLEEP_AVG);
+
+	/*
+	 * for blocked tasks, return half of the average sleep time.
+	 * (because this is the average sleep-time we'll see if we
+	 * sample the period randomly.)
+	 */
+	return NS_TO_JIFFIES(p->sleep_avg) / 2;
+}
+
+EXPORT_SYMBOL_GPL(task_will_schedule_at);
+
+
 #ifdef CONFIG_SMP
 enum request_type {
 	REQ_MOVE_TASK,



-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06 12:21                 ` Kyle Moffett
@ 2004-12-06 16:42                   ` Robert Love
  2004-12-06 17:42                     ` P
  0 siblings, 1 reply; 30+ messages in thread
From: Robert Love @ 2004-12-06 16:42 UTC (permalink / raw)
  To: Kyle Moffett; +Cc: Jeff Sipek, Linux Kernel, Jens Axboe, Con Kolivas


> The reason I proposed my ideas for tying the two values together is 
> that I am
> concerned about breaking existing code.  

Nothing should break.

If apps don't explicitly set their i/o priority, then they get the
default.  Not a big deal.

This allows the default case to be the same as today.

	Robert Love



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #2
  2004-12-06 16:42                   ` Robert Love
@ 2004-12-06 17:42                     ` P
  0 siblings, 0 replies; 30+ messages in thread
From: P @ 2004-12-06 17:42 UTC (permalink / raw)
  To: Robert Love
  Cc: Kyle Moffett, Jeff Sipek, Linux Kernel, Jens Axboe, Con Kolivas

Robert Love wrote:
>>The reason I proposed my ideas for tying the two values together is 
>>that I am
>>concerned about breaking existing code.  
> 
> 
> Nothing should break.
> 
> If apps don't explicitly set their i/o priority, then they get the
> default.  Not a big deal.
> 
> This allows the default case to be the same as today.
> 
> 	Robert Love

For reference, this was discussed last year:
http://marc.theaimsgroup.com/?l=linux-kernel&m=106847268508985&w=2

-- 
Pádraig Brady - http://www.pixelbeat.org
--

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] Time sliced CFQ #3
  2004-12-06 13:27       ` [PATCH] Time sliced CFQ #3 Jens Axboe
  2004-12-06 14:01         ` Søren Lott
  2004-12-06 15:07         ` Prakash K. Cheemplavam
@ 2004-12-06 23:30         ` Ed Tomlinson
  2 siblings, 0 replies; 30+ messages in thread
From: Ed Tomlinson @ 2004-12-06 23:30 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linux Kernel

Jens,

This works here.  Hope you keep patching against mainline.  I`ve 
decided to track it till 2.6.10 comes out...

Thanks,

Ed

On Monday 06 December 2004 08:27, Jens Axboe wrote:
> On Mon, Dec 06 2004, Prakash K. Cheemplavam wrote:
> > Jens Axboe schrieb:
> > >On Mon, Dec 06 2004, Prakash K. Cheemplavam wrote:
> > >
> > >>Hi,
> > >>
> > >>this one crapped out on me, while having heavy disk activity. (updating 
> > >>gentoo portage tree - rebuilding metadata of it). Unfortunately I 
> > >>couldn't save the oops, as I had no hd access anymore and X would freeze 
> > >>a little later...(and I don't want to risk my data a second time...)
> > >
> > >
> > >Did you save anything at all? Just the function of the EIP would be
> > >better than nothing.
> > 
> > Nope, sorry. I hoped it would be in the logs, but it seems as new cfq 
> > went havoc, hd access went dead. And I was a bit too nervous about my 
> > data so that I didn't write it down by hand...
> 
> It is really rare for the io scheduler to cause serious data screwups,
> thankfully. Often what will happen is that it will crash, but with
> everything written fine up to that point. So it's similar to a power
> loss, but the drive should get it's cache out on its own.
> 
> > >Well hard to say anything qualified without an oops :/
> > >
> > >I'll try with PREEMPT here.
> > 
> > If you are not able to reproduce, I will try it again on a spare 
> > partition... Should access to zip drive stil be possible if hd's 
> > io-scheduler is dead?
> 
> Depends on where it died, really. But the chances are probably slim.
> 
> If you feel like giving it another go, I've uploaded a new patch here:
> 
> http://www.kernel.org/pub/linux/kernel/people/axboe/patches/v2.6/2.6.10-rc3/cfq-time-slices-6.gz
> 
> Changes:
> 
> - Increase async_rq slice significantly (from 8 to 128)
> 
> - Fix accounting bug that prevented non-fs requests from working
>   correctly. Things like cdrecord and cdda rippers would hang.
> 
> - Add logic to check whether a given process is potentially runnable or
>   not. We don't arm the slice idle timer if the process has exited or is
>   not either running or about to be running.
> 
> - TCQ fix: don't idle drive until last request comes in.
> 
> - Fix a stall with exiting task holding the active queue. This should
>   fix Helges problems, I hope.
> 
> - Restore ->nr_requests on io scheduler switch
> 
> - Kill ->pid from io_context, this seems to have been added with 'as'
>   but never used by anyone.
> 
> 

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2004-12-06 23:31 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-12-04 10:49 [PATCH] Time sliced CFQ #2 Jens Axboe
2004-12-04 16:39 ` Jeff Sipek
2004-12-05 18:58   ` Jens Axboe
2004-12-06  0:29     ` Jeff Sipek
2004-12-06  1:59       ` Con Kolivas
2004-12-06  2:23         ` Jeff Sipek
2004-12-06  2:34           ` Con Kolivas
2004-12-06  5:00             ` Kyle Moffett
2004-12-06  5:14               ` Robert Love
2004-12-06  7:19                 ` Jens Axboe
2004-12-06 12:18                   ` Helge Hafting
2004-12-06 12:24                     ` Jens Axboe
2004-12-06 12:21                 ` Kyle Moffett
2004-12-06 16:42                   ` Robert Love
2004-12-06 17:42                     ` P
2004-12-06  7:15               ` Jens Axboe
2004-12-06  7:13       ` Jens Axboe
2004-12-05 14:21 ` Ed Tomlinson
2004-12-05 15:18   ` Jens Axboe
2004-12-05 17:58     ` Ed Tomlinson
2004-12-06  9:31 ` Prakash K. Cheemplavam
2004-12-06  9:35   ` Jens Axboe
2004-12-06 11:48     ` Ed Tomlinson
2004-12-06 12:31     ` Prakash K. Cheemplavam
2004-12-06 13:27       ` [PATCH] Time sliced CFQ #3 Jens Axboe
2004-12-06 14:01         ` Søren Lott
2004-12-06 15:01           ` Jens Axboe
2004-12-06 15:45             ` Jens Axboe
2004-12-06 15:07         ` Prakash K. Cheemplavam
2004-12-06 23:30         ` Ed Tomlinson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).