All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vivek Goyal <vgoyal@redhat.com>
To: linux-kernel@vger.kernel.org,
	containers@lists.linux-foundation.org, dm-devel@redhat.com,
	jens.axboe@oracle.com, nauman@google.com, dpshah@google.com,
	lizf@cn.fujitsu.com, mikew@google.com, fchecconi@gmail.com,
	paolo.valente@unimore.it, ryov@valinux.co.jp,
	fernando@oss.ntt.co.jp, s-uchida@ap.jp.nec.com,
	taka@valinux.co.jp, guijianfeng@cn.fujitsu.com,
	jmoyer@redhat.com, dhaval@linux.vnet.ibm.com,
	balbir@linux.vnet.ibm.com, righi.andrea@gmail.com,
	m-ikeda@ds.jp.nec.com, jbaron@redhat.com
Cc: agk@redhat.com, snitzer@redhat.com, vgoyal@redhat.com,
	akpm@linux-foundation.org, peterz@infradead.org
Subject: [PATCH 18/25] io-controller: anticipatory changes for hierarchical fair queuing
Date: Thu,  2 Jul 2009 16:01:50 -0400	[thread overview]
Message-ID: <1246564917-19603-19-git-send-email-vgoyal@redhat.com> (raw)
In-Reply-To: <1246564917-19603-1-git-send-email-vgoyal@redhat.com>

This patch changes anticipatory scheduler to use queue scheduling code from
elevator layer.  One can go back to old as by deselecting
CONFIG_IOSCHED_AS_HIER. Even with CONFIG_IOSCHED_AS_HIER=y, with-out any
other cgroup created, AS behavior should remain the same as old.

o AS is a single queue ioschduler, that means there is one AS queue per group.

o common layer code select the queue to dispatch from based on fairness, and
  then AS code selects the request with-in group.

o AS runs reads and writes batches with-in group. So common layer runs timed
  group queues and with-in group time, AS runs timed batches of reads and
  writes.

o Note: Previously AS write batch length was adjusted synamically whenever
  a W->R batch data direction took place and when first request from the
  read batch completed.

  Now write batch updation takes place when last request from the write
  batch has finished during W->R transition.

o AS runs its own anticipation logic to anticipate on reads. common layer also
  does the anticipation on the group if think time of the group is with-in
  slice_idle.

o Introduced few debugging messages in AS.

Signed-off-by: Nauman Rafique <nauman@google.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 block/Kconfig.iosched    |   12 ++
 block/as-iosched.c       |  280 +++++++++++++++++++++++++++++++++++++++++++++-
 block/elevator-fq.c      |   93 +++++++++++++--
 block/elevator-fq.h      |    3 +
 include/linux/elevator.h |    2 +
 5 files changed, 372 insertions(+), 18 deletions(-)

diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 3a9e7d7..77fc786 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -45,6 +45,18 @@ config IOSCHED_AS
 	  deadline I/O scheduler, it can also be slower in some cases
 	  especially some database loads.
 
+config IOSCHED_AS_HIER
+	bool "Anticipatory Hierarchical Scheduling support"
+	depends on IOSCHED_AS && CGROUPS
+	select ELV_FAIR_QUEUING
+	select GROUP_IOSCHED
+	default n
+	---help---
+	  Enable hierarhical scheduling in anticipatory. In this mode
+	  anticipatory keeps one IO queue per cgroup instead of a global
+	  queue. Elevator fair queuing logic ensures fairness among various
+	  queues.
+
 config IOSCHED_DEADLINE
 	tristate "Deadline I/O scheduler"
 	default y
diff --git a/block/as-iosched.c b/block/as-iosched.c
index e3514eb..18a61bb 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -16,6 +16,7 @@
 #include <linux/compiler.h>
 #include <linux/rbtree.h>
 #include <linux/interrupt.h>
+#include <linux/blktrace_api.h>
 
 /*
  * See Documentation/block/as-iosched.txt
@@ -84,10 +85,24 @@ struct as_queue {
 	struct list_head fifo_list[2];
 
 	struct request *next_rq[2];	/* next in sort order */
+
+	/*
+	 * If an as_queue is switched while a batch is running, then we
+	 * store the time left before current batch will expire
+	 */
+	long current_batch_time_left;
+
+	/*
+	 * batch data dir when queue was scheduled out. This will be used
+	 * to setup ad->batch_data_dir when queue is scheduled in.
+	 */
+	int saved_batch_data_dir;
+
 	unsigned long last_check_fifo[2];
 	int write_batch_count;		/* max # of reqs in a write batch */
 	int current_write_count;	/* how many requests left this batch */
 	int write_batch_idled;		/* has the write batch gone idle? */
+	int nr_queued[2];
 };
 
 struct as_data {
@@ -123,6 +138,9 @@ struct as_data {
 	unsigned long fifo_expire[2];
 	unsigned long batch_expire[2];
 	unsigned long antic_expire;
+
+	/* elevator requested a queue switch. */
+	int switch_queue;
 };
 
 /*
@@ -144,12 +162,174 @@ enum arq_state {
 #define RQ_STATE(rq)	((enum arq_state)(rq)->elevator_private2)
 #define RQ_SET_STATE(rq, state)	((rq)->elevator_private2 = (void *) state)
 
+#define as_log(ad, fmt, args...)        \
+	blk_add_trace_msg((ad)->q, "as " fmt, ##args)
+
 static DEFINE_PER_CPU(unsigned long, ioc_count);
 static struct completion *ioc_gone;
 static DEFINE_SPINLOCK(ioc_gone_lock);
 
 static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
 static void as_antic_stop(struct as_data *ad);
+static inline int as_batch_expired(struct as_data *ad, struct as_queue *asq);
+
+#ifdef CONFIG_IOSCHED_AS_HIER
+static void as_save_batch_context(struct as_data *ad, struct as_queue *asq)
+{
+	/* Save batch data dir */
+	asq->saved_batch_data_dir = ad->batch_data_dir;
+
+	if (ad->changed_batch) {
+		/*
+		 * In case of force expire, we come here. Batch changeover
+		 * has been signalled but we are waiting for all the
+		 * request to finish from previous batch and then start
+		 * the new batch. Can't wait now. Mark that full batch time
+		 * needs to be allocated when this queue is scheduled again.
+		 */
+		asq->current_batch_time_left =
+				ad->batch_expire[ad->batch_data_dir];
+		ad->changed_batch = 0;
+		goto out;
+	}
+
+	if (ad->new_batch) {
+		/*
+		 * We should come here only when new_batch has been set
+		 * but no read request has been issued or if it is a forced
+		 * expiry.
+		 *
+		 * In both the cases, new batch has not started yet so
+		 * allocate full batch length for next scheduling opportunity.
+		 * We don't do write batch size adjustment in hierarchical
+		 * AS so that should not be an issue.
+		 */
+		asq->current_batch_time_left =
+				ad->batch_expire[ad->batch_data_dir];
+		ad->new_batch = 0;
+		goto out;
+	}
+
+	/* Save how much time is left before current batch expires */
+	if (as_batch_expired(ad, asq))
+		asq->current_batch_time_left = 0;
+	else {
+		asq->current_batch_time_left = ad->current_batch_expires
+							- jiffies;
+		BUG_ON((asq->current_batch_time_left) < 0);
+	}
+
+	if (ad->io_context) {
+		put_io_context(ad->io_context);
+		ad->io_context = NULL;
+	}
+
+out:
+	as_log(ad, "save batch: dir=%c time_left=%d changed_batch=%d"
+			" new_batch=%d, antic_status=%d",
+			ad->batch_data_dir ? 'R' : 'W',
+			asq->current_batch_time_left,
+			ad->changed_batch, ad->new_batch, ad->antic_status);
+	return;
+}
+
+/*
+ * FIXME: In original AS, read batch's time account started only after when
+ * first request had completed (if last batch was a write batch). But here
+ * we might be rescheduling a read batch right away irrespective of the fact
+ * of disk cache state.
+ */
+static void as_restore_batch_context(struct as_data *ad, struct as_queue *asq)
+{
+	/* Adjust the batch expire time */
+	if (asq->current_batch_time_left)
+		ad->current_batch_expires = jiffies +
+						asq->current_batch_time_left;
+	/* restore asq batch_data_dir info */
+	ad->batch_data_dir = asq->saved_batch_data_dir;
+	as_log(ad, "restore batch: dir=%c time=%d reads_q=%d writes_q=%d"
+			" ad->antic_status=%d",
+			ad->batch_data_dir ? 'R' : 'W',
+			asq->current_batch_time_left,
+			asq->nr_queued[1], asq->nr_queued[0],
+			ad->antic_status);
+}
+
+/* ioq has been set. */
+static void as_active_ioq_set(struct request_queue *q, void *sched_queue,
+				int coop)
+{
+	struct as_queue *asq = sched_queue;
+	struct as_data *ad = q->elevator->elevator_data;
+
+	as_restore_batch_context(ad, asq);
+}
+
+/*
+ * This is a notification from common layer that it wishes to expire this
+ * io queue. AS decides whether queue can be expired, if yes, it also
+ * saves the batch context.
+ */
+static int as_expire_ioq(struct request_queue *q, void *sched_queue,
+				int slice_expired, int force)
+{
+	struct as_data *ad = q->elevator->elevator_data;
+	int status = ad->antic_status;
+	struct as_queue *asq = sched_queue;
+
+	as_log(ad, "as_expire_ioq slice_expired=%d, force=%d", slice_expired,
+		force);
+
+	/* Forced expiry. We don't have a choice */
+	if (force) {
+		as_antic_stop(ad);
+		/*
+		 * antic_stop() sets antic_status to FINISHED which signifies
+		 * that either we timed out or we found a close request but
+		 * that's not the case here. Start from scratch.
+		 */
+		ad->antic_status = ANTIC_OFF;
+		as_save_batch_context(ad, asq);
+		ad->switch_queue = 0;
+		return 1;
+	}
+
+	/*
+	 * We are waiting for requests to finish from last
+	 * batch. Don't expire the queue now
+	 */
+	if (ad->changed_batch)
+		goto keep_queue;
+
+	/*
+	 * Wait for all requests from existing batch to finish before we
+	 * switch the queue. New queue might change the batch direction
+	 * and this is to be consistent with AS philosophy of not dispatching
+	 * new requests to underlying drive till requests from requests
+	 * from previous batch are completed.
+	 */
+	if (ad->nr_dispatched)
+		goto keep_queue;
+
+	/*
+	 * If AS anticipation is ON, wait for it to finish.
+	 */
+	BUG_ON(status == ANTIC_WAIT_REQ);
+
+	if (status == ANTIC_WAIT_NEXT)
+		goto keep_queue;
+
+	/* We are good to expire the queue. Save batch context */
+	as_save_batch_context(ad, asq);
+	ad->switch_queue = 0;
+	return 1;
+
+keep_queue:
+	/* Mark that elevator requested for queue switch whenever possible */
+	ad->switch_queue = 1;
+	return 0;
+}
+#endif
 
 /*
  * IO Context helper functions
@@ -429,6 +609,7 @@ static void as_antic_waitnext(struct as_data *ad)
 	mod_timer(&ad->antic_timer, timeout);
 
 	ad->antic_status = ANTIC_WAIT_NEXT;
+	as_log(ad, "antic_waitnext set");
 }
 
 /*
@@ -442,8 +623,10 @@ static void as_antic_waitreq(struct as_data *ad)
 	if (ad->antic_status == ANTIC_OFF) {
 		if (!ad->io_context || ad->ioc_finished)
 			as_antic_waitnext(ad);
-		else
+		else {
 			ad->antic_status = ANTIC_WAIT_REQ;
+			as_log(ad, "antic_waitreq set");
+		}
 	}
 }
 
@@ -455,6 +638,8 @@ static void as_antic_stop(struct as_data *ad)
 {
 	int status = ad->antic_status;
 
+	as_log(ad, "as_antic_stop antic_status=%d", ad->antic_status);
+
 	if (status == ANTIC_WAIT_REQ || status == ANTIC_WAIT_NEXT) {
 		if (status == ANTIC_WAIT_NEXT)
 			del_timer(&ad->antic_timer);
@@ -474,6 +659,7 @@ static void as_antic_timeout(unsigned long data)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
+	as_log(ad, "as_antic_timeout");
 	if (ad->antic_status == ANTIC_WAIT_REQ
 			|| ad->antic_status == ANTIC_WAIT_NEXT) {
 		struct as_io_context *aic;
@@ -652,6 +838,21 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
 	struct io_context *ioc;
 	struct as_io_context *aic;
 
+#ifdef CONFIG_IOSCHED_AS_HIER
+	/*
+	 * If the active asq and rq's asq are not same, then one can not
+	 * break the anticipation. This primarily becomes useful when a
+	 * request is added to a queue which is not being served currently.
+	 */
+	if (rq) {
+		struct as_queue *asq = elv_get_sched_queue(ad->q, rq);
+		struct as_queue *curr_asq =
+				elv_active_sched_queue(ad->q->elevator);
+
+		if (asq != curr_asq)
+			return 0;
+	}
+#endif
 	ioc = ad->io_context;
 	BUG_ON(!ioc);
 	spin_lock(&ioc->lock);
@@ -810,16 +1011,20 @@ static void as_update_rq(struct as_data *ad, struct request *rq)
 /*
  * Gathers timings and resizes the write batch automatically
  */
-static void update_write_batch(struct as_data *ad)
+static void update_write_batch(struct as_data *ad, struct request *rq)
 {
 	unsigned long batch = ad->batch_expire[BLK_RW_ASYNC];
 	long write_time;
-	struct as_queue *asq = elv_get_sched_queue(ad->q, NULL);
+	struct as_queue *asq = elv_get_sched_queue(ad->q, rq);
 
 	write_time = (jiffies - ad->current_batch_expires) + batch;
 	if (write_time < 0)
 		write_time = 0;
 
+	as_log(ad, "upd write: write_time=%d batch=%d write_batch_idled=%d"
+			" current_write_count=%d", write_time, batch,
+			asq->write_batch_idled, asq->current_write_count);
+
 	if (write_time > batch && !asq->write_batch_idled) {
 		if (write_time > batch * 3)
 			asq->write_batch_count /= 2;
@@ -834,6 +1039,8 @@ static void update_write_batch(struct as_data *ad)
 
 	if (asq->write_batch_count < 1)
 		asq->write_batch_count = 1;
+
+	as_log(ad, "upd write count=%d", asq->write_batch_count);
 }
 
 /*
@@ -843,6 +1050,7 @@ static void update_write_batch(struct as_data *ad)
 static void as_completed_request(struct request_queue *q, struct request *rq)
 {
 	struct as_data *ad = q->elevator->elevator_data;
+	struct as_queue *asq = elv_get_sched_queue(q, rq);
 
 	WARN_ON(!list_empty(&rq->queuelist));
 
@@ -851,7 +1059,24 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
 		goto out;
 	}
 
+	as_log(ad, "complete: reads_q=%d writes_q=%d changed_batch=%d"
+		" new_batch=%d switch_queue=%d, dir=%c",
+		asq->nr_queued[1], asq->nr_queued[0], ad->changed_batch,
+		ad->new_batch, ad->switch_queue,
+		ad->batch_data_dir ? 'R' : 'W');
+
 	if (ad->changed_batch && ad->nr_dispatched == 1) {
+		/*
+		 * If this was write batch finishing, adjust the write batch
+		 * length.
+		 *
+		 * Note, write batch length is being calculated upon completion
+		 * of last write request finished and not completion of first
+		 * read request finished in the next batch.
+		 */
+		if (ad->batch_data_dir == BLK_RW_SYNC)
+			update_write_batch(ad, rq);
+
 		ad->current_batch_expires = jiffies +
 					ad->batch_expire[ad->batch_data_dir];
 		kblockd_schedule_work(q, &ad->antic_work);
@@ -869,7 +1094,6 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
 	 * and writeback caches
 	 */
 	if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
-		update_write_batch(ad);
 		ad->current_batch_expires = jiffies +
 				ad->batch_expire[BLK_RW_SYNC];
 		ad->new_batch = 0;
@@ -888,6 +1112,13 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
 	}
 
 	as_put_io_context(rq);
+
+	/*
+	 * If elevator requested a queue switch, kick the queue in the
+	 * hope that this is right time for switch.
+	 */
+	if (ad->switch_queue)
+		kblockd_schedule_work(q, &ad->antic_work);
 out:
 	RQ_SET_STATE(rq, AS_RQ_POSTSCHED);
 }
@@ -908,6 +1139,9 @@ static void as_remove_queued_request(struct request_queue *q,
 
 	WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
 
+	BUG_ON(asq->nr_queued[data_dir] <= 0);
+	asq->nr_queued[data_dir]--;
+
 	ioc = RQ_IOC(rq);
 	if (ioc && ioc->aic) {
 		BUG_ON(!atomic_read(&ioc->aic->nr_queued));
@@ -1019,6 +1253,8 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
 	if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
 		atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
 	ad->nr_dispatched++;
+	as_log(ad, "dispatch req dir=%c nr_dispatched = %d",
+			data_dir ? 'R' : 'W', ad->nr_dispatched);
 }
 
 /*
@@ -1066,6 +1302,7 @@ static int as_dispatch_request(struct request_queue *q, int force)
 		}
 		asq->last_check_fifo[BLK_RW_ASYNC] = jiffies;
 
+		as_log(ad, "forced dispatch");
 		return dispatched;
 	}
 
@@ -1078,8 +1315,14 @@ static int as_dispatch_request(struct request_queue *q, int force)
 	if (!(reads || writes)
 		|| ad->antic_status == ANTIC_WAIT_REQ
 		|| ad->antic_status == ANTIC_WAIT_NEXT
-		|| ad->changed_batch)
+		|| ad->changed_batch) {
+		as_log(ad, "no dispatch. read_q=%d, writes_q=%d"
+			" ad->antic_status=%d, changed_batch=%d,"
+			" switch_queue=%d new_batch=%d", asq->nr_queued[1],
+			asq->nr_queued[0], ad->antic_status, ad->changed_batch,
+			ad->switch_queue, ad->new_batch);
 		return 0;
+	}
 
 	if (!(reads && writes && as_batch_expired(ad, asq))) {
 		/*
@@ -1092,6 +1335,7 @@ static int as_dispatch_request(struct request_queue *q, int force)
 				goto fifo_expired;
 
 			if (as_can_anticipate(ad, rq)) {
+				as_log(ad, "can_anticipate = 1");
 				as_antic_waitreq(ad);
 				return 0;
 			}
@@ -1111,6 +1355,8 @@ static int as_dispatch_request(struct request_queue *q, int force)
 	 * data direction (read / write)
 	 */
 
+	as_log(ad, "select a fresh batch and request");
+
 	if (reads) {
 		BUG_ON(RB_EMPTY_ROOT(&asq->sort_list[BLK_RW_SYNC]));
 
@@ -1125,6 +1371,7 @@ static int as_dispatch_request(struct request_queue *q, int force)
 			ad->changed_batch = 1;
 		}
 		ad->batch_data_dir = BLK_RW_SYNC;
+		as_log(ad, "new batch dir is sync");
 		rq = rq_entry_fifo(asq->fifo_list[BLK_RW_SYNC].next);
 		asq->last_check_fifo[ad->batch_data_dir] = jiffies;
 		goto dispatch_request;
@@ -1149,6 +1396,7 @@ dispatch_writes:
 			ad->new_batch = 0;
 		}
 		ad->batch_data_dir = BLK_RW_ASYNC;
+		as_log(ad, "new batch dir is async");
 		asq->current_write_count = asq->write_batch_count;
 		asq->write_batch_idled = 0;
 		rq = rq_entry_fifo(asq->fifo_list[BLK_RW_ASYNC].next);
@@ -1184,6 +1432,9 @@ fifo_expired:
 		ad->changed_batch = 0;
 	}
 
+	if (ad->switch_queue)
+		return 0;
+
 	/*
 	 * rq is the selected appropriate request.
 	 */
@@ -1207,6 +1458,11 @@ static void as_add_request(struct request_queue *q, struct request *rq)
 
 	rq->elevator_private = as_get_io_context(q->node);
 
+	asq->nr_queued[data_dir]++;
+	as_log(ad, "add a %c request read_q=%d write_q=%d",
+			data_dir ? 'R' : 'W', asq->nr_queued[1],
+			asq->nr_queued[0]);
+
 	if (RQ_IOC(rq)) {
 		as_update_iohist(ad, RQ_IOC(rq)->aic, rq);
 		atomic_inc(&RQ_IOC(rq)->aic->nr_queued);
@@ -1408,6 +1664,7 @@ static void *as_init_queue(struct request_queue *q)
 	ad->batch_expire[BLK_RW_ASYNC] = default_write_batch_expire;
 
 	ad->current_batch_expires = jiffies + ad->batch_expire[BLK_RW_SYNC];
+	ad->switch_queue = 0;
 
 	return ad;
 }
@@ -1493,6 +1750,11 @@ static struct elv_fs_entry as_attrs[] = {
 	AS_ATTR(antic_expire),
 	AS_ATTR(read_batch_expire),
 	AS_ATTR(write_batch_expire),
+#ifdef CONFIG_IOSCHED_AS_HIER
+	ELV_ATTR(fairness),
+	ELV_ATTR(slice_idle),
+	ELV_ATTR(slice_sync),
+#endif
 	__ATTR_NULL
 };
 
@@ -1514,8 +1776,14 @@ static struct elevator_type iosched_as = {
 		.trim =				as_trim,
 		.elevator_alloc_sched_queue_fn = as_alloc_as_queue,
 		.elevator_free_sched_queue_fn = as_free_as_queue,
+#ifdef CONFIG_IOSCHED_AS_HIER
+		.elevator_expire_ioq_fn =       as_expire_ioq,
+		.elevator_active_ioq_set_fn =   as_active_ioq_set,
 	},
-
+	.elevator_features = ELV_IOSCHED_NEED_FQ | ELV_IOSCHED_SINGLE_IOQ,
+#else
+	},
+#endif
 	.elevator_attrs = as_attrs,
 	.elevator_name = "anticipatory",
 	.elevator_owner = THIS_MODULE,
diff --git a/block/elevator-fq.c b/block/elevator-fq.c
index 25fdac6..c1d04b1 100644
--- a/block/elevator-fq.c
+++ b/block/elevator-fq.c
@@ -2523,6 +2523,7 @@ static void __elv_set_active_ioq(struct elv_fq_data *efqd, struct io_queue *ioq,
 		elv_clear_ioq_must_dispatch(ioq);
 		elv_clear_ioq_wait_busy_done(ioq);
 		elv_mark_ioq_slice_new(ioq);
+		elv_clear_ioq_must_expire(ioq);
 
 		del_timer(&efqd->idle_slice_timer);
 	}
@@ -2646,6 +2647,49 @@ static inline unsigned long elv_disk_time_used(struct request_queue *q,
 }
 
 /*
+ * Call iosched to let that elevator wants to expire the queue. This gives
+ * iosched like AS to say no (if it is in the middle of batch changeover or
+ * it is anticipating). it also allows iosched to do some house keeping
+ *
+ * force--> it is force dispatch and iosched must clean up its state. This
+ * 	     is useful when elevator wants to drain iosched and wants to
+ * 	     expire currnent active queue.
+ *
+ * slice_expired--> if 1, ioq slice expired hence elevator fair queuing logic
+ * 		    wants to switch the queue. iosched should allow that until
+ * 		    and unless necessary. Currently AS can deny the switch if
+ * 		    in the middle of batch switch.
+ *
+ * 		    if 0, time slice is still remaining. It is up to the iosched
+ * 		    whether it wants to wait on this queue or just want to
+ * 		    expire it and move on to next queue.
+ *
+ */
+static int elv_iosched_expire_ioq(struct request_queue *q, int slice_expired,
+					int force)
+{
+	struct elevator_queue *e = q->elevator;
+	struct io_queue *ioq = elv_active_ioq(q->elevator);
+	int ret = 1;
+
+	if (e->ops->elevator_expire_ioq_fn) {
+		ret = e->ops->elevator_expire_ioq_fn(q, ioq->sched_queue,
+							slice_expired, force);
+		/*
+		 * AS denied expiration of queue right now. Mark that elevator
+		 * layer has requested ioscheduler (as) to expire this queue.
+		 * Now as will try to expire this queue as soon as it can.
+		 * Now don't try to dispatch from this queue even if we get
+		 * a new request and if time slice is left. Do expire it once.
+		 */
+		if (!ret)
+			elv_mark_ioq_must_expire(ioq);
+	}
+
+	return ret;
+}
+
+/*
  * Do the accounting. Determine how much service (in terms of time slices)
  * current queue used and adjust the start, finish time of queue and vtime
  * of the tree accordingly.
@@ -2683,6 +2727,7 @@ void __elv_ioq_slice_expired(struct request_queue *q, struct io_queue *ioq)
 	elv_clear_ioq_wait_request(ioq);
 	elv_clear_ioq_wait_busy(ioq);
 	elv_clear_ioq_wait_busy_done(ioq);
+	elv_clear_ioq_must_expire(ioq);
 
 	/*
 	 * if ioq->slice_end = 0, that means a queue was expired before first
@@ -2821,16 +2866,18 @@ static int elv_should_preempt(struct request_queue *q, struct io_queue *new_ioq,
 static void elv_preempt_queue(struct request_queue *q, struct io_queue *ioq)
 {
 	elv_log_ioq(&q->elevator->efqd, ioq, "preempt");
-	elv_ioq_slice_expired(q);
+	if (elv_iosched_expire_ioq(q, 0, 1)) {
+		elv_ioq_slice_expired(q);
 
-	/*
-	 * Put the new queue at the front of the of the current list,
-	 * so we know that it will be selected next.
-	 */
+		/*
+		 * Put the new queue at the front of the of the current list,
+		 * so we know that it will be selected next.
+		 */
 
-	elv_activate_ioq(ioq, 1);
-	ioq->slice_end = 0;
-	elv_mark_ioq_slice_new(ioq);
+		elv_activate_ioq(ioq, 1);
+		ioq->slice_end = 0;
+		elv_mark_ioq_slice_new(ioq);
+	}
 }
 
 void elv_ioq_request_add(struct request_queue *q, struct request *rq)
@@ -3035,6 +3082,8 @@ void *elv_fq_select_ioq(struct request_queue *q, int force)
 	struct elv_fq_data *efqd = &q->elevator->efqd;
 	struct io_queue *new_ioq = NULL, *ioq = elv_active_ioq(q->elevator);
 	struct io_group *iog;
+	struct elevator_type *e = q->elevator->elevator_type;
+	int slice_expired = 1;
 
 	if (!elv_nr_busy_ioq(q->elevator))
 		return NULL;
@@ -3053,6 +3102,10 @@ void *elv_fq_select_ioq(struct request_queue *q, int force)
 			goto expire;
 	}
 
+	/* This queue has been marked for expiry. Try to expire it */
+	if (elv_ioq_must_expire(ioq))
+		goto expire;
+
 	/*
 	 * If there is only root group present, don't expire the queue for
 	 * single queue ioschedulers (noop, deadline, AS). It is unnecessary
@@ -3142,8 +3195,10 @@ void *elv_fq_select_ioq(struct request_queue *q, int force)
 		goto keep_queue;
 	}
 
+	slice_expired = 0;
 expire:
-	if (efqd->fairness >= 2 && !force && ioq && ioq->dispatched) {
+	if (efqd->fairness >= 2 && !force && ioq && ioq->dispatched
+	    && strcmp(e->elevator_name, "anticipatory")) {
 		/*
 		 * If there are request dispatched from this queue, don't
 		 * dispatch requests from new queue till all the requests from
@@ -3154,6 +3209,11 @@ expire:
 		 *
 		 * Set ioq = NULL so that no more requests are dispatched from
 		 * this queue.
+		 *
+		 * Note: Anticipatory already has the behavior where queue
+		 * switch is not allowed until requests from previous queue
+		 * have finished. Hence we don't have to get into this loop
+		 * in case of AS.
 		 */
 		elv_log_ioq(efqd, ioq, "select: wait for requests to finish"
 				" disp=%lu", ioq->dispatched);
@@ -3161,7 +3221,14 @@ expire:
 		goto keep_queue;
 	}
 
-	elv_ioq_slice_expired(q);
+	if (elv_iosched_expire_ioq(q, slice_expired, force))
+		elv_ioq_slice_expired(q);
+	else
+		/*
+		 * Not making ioq = NULL, as AS can deny queue expiration and
+		 * continue to dispatch from same queue
+		 */
+		goto keep_queue;
 new_queue:
 	ioq = elv_set_active_ioq(q, new_ioq);
 keep_queue:
@@ -3298,7 +3365,8 @@ void elv_ioq_completed_request(struct request_queue *q, struct request *rq)
 		}
 
 		if (elv_ioq_class_idle(ioq)) {
-			elv_ioq_slice_expired(q);
+			if (elv_iosched_expire_ioq(q, 1, 0))
+				elv_ioq_slice_expired(q);
 			goto done;
 		}
 
@@ -3343,7 +3411,8 @@ void elv_ioq_completed_request(struct request_queue *q, struct request *rq)
 					goto done;
 
 				/* Expire the queue */
-				elv_ioq_slice_expired(q);
+				if (elv_iosched_expire_ioq(q, 1, 0))
+					elv_ioq_slice_expired(q);
 			}
 		} else if (!ioq->nr_queued && !elv_close_cooperator(q, ioq, 1)
 			 && sync && (!rq_noidle(rq) || efqd->fairness))
diff --git a/block/elevator-fq.h b/block/elevator-fq.h
index baa6cee..c117d40 100644
--- a/block/elevator-fq.h
+++ b/block/elevator-fq.h
@@ -371,6 +371,8 @@ enum elv_queue_state_flags {
 	ELV_QUEUE_FLAG_slice_new,	  /* no requests dispatched in slice */
 	ELV_QUEUE_FLAG_wait_busy,	  /* wait for this queue to get busy */
 	ELV_QUEUE_FLAG_wait_busy_done,	  /* Have already waited on this queue*/
+	ELV_QUEUE_FLAG_must_expire,       /* Expire this queue even if it has
+					   * request and time slice left */
 };
 
 #define ELV_IO_QUEUE_FLAG_FNS(name)					\
@@ -395,6 +397,7 @@ ELV_IO_QUEUE_FLAG_FNS(idle_window)
 ELV_IO_QUEUE_FLAG_FNS(slice_new)
 ELV_IO_QUEUE_FLAG_FNS(wait_busy)
 ELV_IO_QUEUE_FLAG_FNS(wait_busy_done)
+ELV_IO_QUEUE_FLAG_FNS(must_expire)
 
 static inline struct io_service_tree *
 io_entity_service_tree(struct io_entity *entity)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 6d2c8db..dda7951 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -42,6 +42,7 @@ typedef int (elevator_update_idle_window_fn) (struct elevator_queue*, void*,
 						struct request*);
 typedef struct io_queue* (elevator_close_cooperator_fn) (struct request_queue*,
 						void*, int probe);
+typedef int (elevator_expire_ioq_fn) (struct request_queue*, void *, int, int);
 #endif
 
 struct elevator_ops
@@ -81,6 +82,7 @@ struct elevator_ops
 	elevator_should_preempt_fn *elevator_should_preempt_fn;
 	elevator_update_idle_window_fn *elevator_update_idle_window_fn;
 	elevator_close_cooperator_fn *elevator_close_cooperator_fn;
+	elevator_expire_ioq_fn  *elevator_expire_ioq_fn;
 #endif
 };
 
-- 
1.6.0.6


WARNING: multiple messages have this Message-ID (diff)
From: Vivek Goyal <vgoyal@redhat.com>
To: linux-kernel@vger.kernel.org,
	containers@lists.linux-foundation.org, dm-devel@redhat.com,
	jens.axboe@oracle.com, nauman@google.com, dpshah@google.com,
	lizf@cn.fujitsu.com, mikew@google
Cc: peterz@infradead.org, akpm@linux-foundation.org,
	snitzer@redhat.com, agk@redhat.com, vgoyal@redhat.com
Subject: [PATCH 18/25] io-controller: anticipatory changes for hierarchical fair queuing
Date: Thu,  2 Jul 2009 16:01:50 -0400	[thread overview]
Message-ID: <1246564917-19603-19-git-send-email-vgoyal@redhat.com> (raw)
In-Reply-To: <1246564917-19603-1-git-send-email-vgoyal@redhat.com>

This patch changes anticipatory scheduler to use queue scheduling code from
elevator layer.  One can go back to old as by deselecting
CONFIG_IOSCHED_AS_HIER. Even with CONFIG_IOSCHED_AS_HIER=y, with-out any
other cgroup created, AS behavior should remain the same as old.

o AS is a single queue ioschduler, that means there is one AS queue per group.

o common layer code select the queue to dispatch from based on fairness, and
  then AS code selects the request with-in group.

o AS runs reads and writes batches with-in group. So common layer runs timed
  group queues and with-in group time, AS runs timed batches of reads and
  writes.

o Note: Previously AS write batch length was adjusted synamically whenever
  a W->R batch data direction took place and when first request from the
  read batch completed.

  Now write batch updation takes place when last request from the write
  batch has finished during W->R transition.

o AS runs its own anticipation logic to anticipate on reads. common layer also
  does the anticipation on the group if think time of the group is with-in
  slice_idle.

o Introduced few debugging messages in AS.

Signed-off-by: Nauman Rafique <nauman@google.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 block/Kconfig.iosched    |   12 ++
 block/as-iosched.c       |  280 +++++++++++++++++++++++++++++++++++++++++++++-
 block/elevator-fq.c      |   93 +++++++++++++--
 block/elevator-fq.h      |    3 +
 include/linux/elevator.h |    2 +
 5 files changed, 372 insertions(+), 18 deletions(-)

diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 3a9e7d7..77fc786 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -45,6 +45,18 @@ config IOSCHED_AS
 	  deadline I/O scheduler, it can also be slower in some cases
 	  especially some database loads.
 
+config IOSCHED_AS_HIER
+	bool "Anticipatory Hierarchical Scheduling support"
+	depends on IOSCHED_AS && CGROUPS
+	select ELV_FAIR_QUEUING
+	select GROUP_IOSCHED
+	default n
+	---help---
+	  Enable hierarhical scheduling in anticipatory. In this mode
+	  anticipatory keeps one IO queue per cgroup instead of a global
+	  queue. Elevator fair queuing logic ensures fairness among various
+	  queues.
+
 config IOSCHED_DEADLINE
 	tristate "Deadline I/O scheduler"
 	default y
diff --git a/block/as-iosched.c b/block/as-iosched.c
index e3514eb..18a61bb 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -16,6 +16,7 @@
 #include <linux/compiler.h>
 #include <linux/rbtree.h>
 #include <linux/interrupt.h>
+#include <linux/blktrace_api.h>
 
 /*
  * See Documentation/block/as-iosched.txt
@@ -84,10 +85,24 @@ struct as_queue {
 	struct list_head fifo_list[2];
 
 	struct request *next_rq[2];	/* next in sort order */
+
+	/*
+	 * If an as_queue is switched while a batch is running, then we
+	 * store the time left before current batch will expire
+	 */
+	long current_batch_time_left;
+
+	/*
+	 * batch data dir when queue was scheduled out. This will be used
+	 * to setup ad->batch_data_dir when queue is scheduled in.
+	 */
+	int saved_batch_data_dir;
+
 	unsigned long last_check_fifo[2];
 	int write_batch_count;		/* max # of reqs in a write batch */
 	int current_write_count;	/* how many requests left this batch */
 	int write_batch_idled;		/* has the write batch gone idle? */
+	int nr_queued[2];
 };
 
 struct as_data {
@@ -123,6 +138,9 @@ struct as_data {
 	unsigned long fifo_expire[2];
 	unsigned long batch_expire[2];
 	unsigned long antic_expire;
+
+	/* elevator requested a queue switch. */
+	int switch_queue;
 };
 
 /*
@@ -144,12 +162,174 @@ enum arq_state {
 #define RQ_STATE(rq)	((enum arq_state)(rq)->elevator_private2)
 #define RQ_SET_STATE(rq, state)	((rq)->elevator_private2 = (void *) state)
 
+#define as_log(ad, fmt, args...)        \
+	blk_add_trace_msg((ad)->q, "as " fmt, ##args)
+
 static DEFINE_PER_CPU(unsigned long, ioc_count);
 static struct completion *ioc_gone;
 static DEFINE_SPINLOCK(ioc_gone_lock);
 
 static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
 static void as_antic_stop(struct as_data *ad);
+static inline int as_batch_expired(struct as_data *ad, struct as_queue *asq);
+
+#ifdef CONFIG_IOSCHED_AS_HIER
+static void as_save_batch_context(struct as_data *ad, struct as_queue *asq)
+{
+	/* Save batch data dir */
+	asq->saved_batch_data_dir = ad->batch_data_dir;
+
+	if (ad->changed_batch) {
+		/*
+		 * In case of force expire, we come here. Batch changeover
+		 * has been signalled but we are waiting for all the
+		 * request to finish from previous batch and then start
+		 * the new batch. Can't wait now. Mark that full batch time
+		 * needs to be allocated when this queue is scheduled again.
+		 */
+		asq->current_batch_time_left =
+				ad->batch_expire[ad->batch_data_dir];
+		ad->changed_batch = 0;
+		goto out;
+	}
+
+	if (ad->new_batch) {
+		/*
+		 * We should come here only when new_batch has been set
+		 * but no read request has been issued or if it is a forced
+		 * expiry.
+		 *
+		 * In both the cases, new batch has not started yet so
+		 * allocate full batch length for next scheduling opportunity.
+		 * We don't do write batch size adjustment in hierarchical
+		 * AS so that should not be an issue.
+		 */
+		asq->current_batch_time_left =
+				ad->batch_expire[ad->batch_data_dir];
+		ad->new_batch = 0;
+		goto out;
+	}
+
+	/* Save how much time is left before current batch expires */
+	if (as_batch_expired(ad, asq))
+		asq->current_batch_time_left = 0;
+	else {
+		asq->current_batch_time_left = ad->current_batch_expires
+							- jiffies;
+		BUG_ON((asq->current_batch_time_left) < 0);
+	}
+
+	if (ad->io_context) {
+		put_io_context(ad->io_context);
+		ad->io_context = NULL;
+	}
+
+out:
+	as_log(ad, "save batch: dir=%c time_left=%d changed_batch=%d"
+			" new_batch=%d, antic_status=%d",
+			ad->batch_data_dir ? 'R' : 'W',
+			asq->current_batch_time_left,
+			ad->changed_batch, ad->new_batch, ad->antic_status);
+	return;
+}
+
+/*
+ * FIXME: In original AS, read batch's time account started only after when
+ * first request had completed (if last batch was a write batch). But here
+ * we might be rescheduling a read batch right away irrespective of the fact
+ * of disk cache state.
+ */
+static void as_restore_batch_context(struct as_data *ad, struct as_queue *asq)
+{
+	/* Adjust the batch expire time */
+	if (asq->current_batch_time_left)
+		ad->current_batch_expires = jiffies +
+						asq->current_batch_time_left;
+	/* restore asq batch_data_dir info */
+	ad->batch_data_dir = asq->saved_batch_data_dir;
+	as_log(ad, "restore batch: dir=%c time=%d reads_q=%d writes_q=%d"
+			" ad->antic_status=%d",
+			ad->batch_data_dir ? 'R' : 'W',
+			asq->current_batch_time_left,
+			asq->nr_queued[1], asq->nr_queued[0],
+			ad->antic_status);
+}
+
+/* ioq has been set. */
+static void as_active_ioq_set(struct request_queue *q, void *sched_queue,
+				int coop)
+{
+	struct as_queue *asq = sched_queue;
+	struct as_data *ad = q->elevator->elevator_data;
+
+	as_restore_batch_context(ad, asq);
+}
+
+/*
+ * This is a notification from common layer that it wishes to expire this
+ * io queue. AS decides whether queue can be expired, if yes, it also
+ * saves the batch context.
+ */
+static int as_expire_ioq(struct request_queue *q, void *sched_queue,
+				int slice_expired, int force)
+{
+	struct as_data *ad = q->elevator->elevator_data;
+	int status = ad->antic_status;
+	struct as_queue *asq = sched_queue;
+
+	as_log(ad, "as_expire_ioq slice_expired=%d, force=%d", slice_expired,
+		force);
+
+	/* Forced expiry. We don't have a choice */
+	if (force) {
+		as_antic_stop(ad);
+		/*
+		 * antic_stop() sets antic_status to FINISHED which signifies
+		 * that either we timed out or we found a close request but
+		 * that's not the case here. Start from scratch.
+		 */
+		ad->antic_status = ANTIC_OFF;
+		as_save_batch_context(ad, asq);
+		ad->switch_queue = 0;
+		return 1;
+	}
+
+	/*
+	 * We are waiting for requests to finish from last
+	 * batch. Don't expire the queue now
+	 */
+	if (ad->changed_batch)
+		goto keep_queue;
+
+	/*
+	 * Wait for all requests from existing batch to finish before we
+	 * switch the queue. New queue might change the batch direction
+	 * and this is to be consistent with AS philosophy of not dispatching
+	 * new requests to underlying drive till requests from requests
+	 * from previous batch are completed.
+	 */
+	if (ad->nr_dispatched)
+		goto keep_queue;
+
+	/*
+	 * If AS anticipation is ON, wait for it to finish.
+	 */
+	BUG_ON(status == ANTIC_WAIT_REQ);
+
+	if (status == ANTIC_WAIT_NEXT)
+		goto keep_queue;
+
+	/* We are good to expire the queue. Save batch context */
+	as_save_batch_context(ad, asq);
+	ad->switch_queue = 0;
+	return 1;
+
+keep_queue:
+	/* Mark that elevator requested for queue switch whenever possible */
+	ad->switch_queue = 1;
+	return 0;
+}
+#endif
 
 /*
  * IO Context helper functions
@@ -429,6 +609,7 @@ static void as_antic_waitnext(struct as_data *ad)
 	mod_timer(&ad->antic_timer, timeout);
 
 	ad->antic_status = ANTIC_WAIT_NEXT;
+	as_log(ad, "antic_waitnext set");
 }
 
 /*
@@ -442,8 +623,10 @@ static void as_antic_waitreq(struct as_data *ad)
 	if (ad->antic_status == ANTIC_OFF) {
 		if (!ad->io_context || ad->ioc_finished)
 			as_antic_waitnext(ad);
-		else
+		else {
 			ad->antic_status = ANTIC_WAIT_REQ;
+			as_log(ad, "antic_waitreq set");
+		}
 	}
 }
 
@@ -455,6 +638,8 @@ static void as_antic_stop(struct as_data *ad)
 {
 	int status = ad->antic_status;
 
+	as_log(ad, "as_antic_stop antic_status=%d", ad->antic_status);
+
 	if (status == ANTIC_WAIT_REQ || status == ANTIC_WAIT_NEXT) {
 		if (status == ANTIC_WAIT_NEXT)
 			del_timer(&ad->antic_timer);
@@ -474,6 +659,7 @@ static void as_antic_timeout(unsigned long data)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
+	as_log(ad, "as_antic_timeout");
 	if (ad->antic_status == ANTIC_WAIT_REQ
 			|| ad->antic_status == ANTIC_WAIT_NEXT) {
 		struct as_io_context *aic;
@@ -652,6 +838,21 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
 	struct io_context *ioc;
 	struct as_io_context *aic;
 
+#ifdef CONFIG_IOSCHED_AS_HIER
+	/*
+	 * If the active asq and rq's asq are not same, then one can not
+	 * break the anticipation. This primarily becomes useful when a
+	 * request is added to a queue which is not being served currently.
+	 */
+	if (rq) {
+		struct as_queue *asq = elv_get_sched_queue(ad->q, rq);
+		struct as_queue *curr_asq =
+				elv_active_sched_queue(ad->q->elevator);
+
+		if (asq != curr_asq)
+			return 0;
+	}
+#endif
 	ioc = ad->io_context;
 	BUG_ON(!ioc);
 	spin_lock(&ioc->lock);
@@ -810,16 +1011,20 @@ static void as_update_rq(struct as_data *ad, struct request *rq)
 /*
  * Gathers timings and resizes the write batch automatically
  */
-static void update_write_batch(struct as_data *ad)
+static void update_write_batch(struct as_data *ad, struct request *rq)
 {
 	unsigned long batch = ad->batch_expire[BLK_RW_ASYNC];
 	long write_time;
-	struct as_queue *asq = elv_get_sched_queue(ad->q, NULL);
+	struct as_queue *asq = elv_get_sched_queue(ad->q, rq);
 
 	write_time = (jiffies - ad->current_batch_expires) + batch;
 	if (write_time < 0)
 		write_time = 0;
 
+	as_log(ad, "upd write: write_time=%d batch=%d write_batch_idled=%d"
+			" current_write_count=%d", write_time, batch,
+			asq->write_batch_idled, asq->current_write_count);
+
 	if (write_time > batch && !asq->write_batch_idled) {
 		if (write_time > batch * 3)
 			asq->write_batch_count /= 2;
@@ -834,6 +1039,8 @@ static void update_write_batch(struct as_data *ad)
 
 	if (asq->write_batch_count < 1)
 		asq->write_batch_count = 1;
+
+	as_log(ad, "upd write count=%d", asq->write_batch_count);
 }
 
 /*
@@ -843,6 +1050,7 @@ static void update_write_batch(struct as_data *ad)
 static void as_completed_request(struct request_queue *q, struct request *rq)
 {
 	struct as_data *ad = q->elevator->elevator_data;
+	struct as_queue *asq = elv_get_sched_queue(q, rq);
 
 	WARN_ON(!list_empty(&rq->queuelist));
 
@@ -851,7 +1059,24 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
 		goto out;
 	}
 
+	as_log(ad, "complete: reads_q=%d writes_q=%d changed_batch=%d"
+		" new_batch=%d switch_queue=%d, dir=%c",
+		asq->nr_queued[1], asq->nr_queued[0], ad->changed_batch,
+		ad->new_batch, ad->switch_queue,
+		ad->batch_data_dir ? 'R' : 'W');
+
 	if (ad->changed_batch && ad->nr_dispatched == 1) {
+		/*
+		 * If this was write batch finishing, adjust the write batch
+		 * length.
+		 *
+		 * Note, write batch length is being calculated upon completion
+		 * of last write request finished and not completion of first
+		 * read request finished in the next batch.
+		 */
+		if (ad->batch_data_dir == BLK_RW_SYNC)
+			update_write_batch(ad, rq);
+
 		ad->current_batch_expires = jiffies +
 					ad->batch_expire[ad->batch_data_dir];
 		kblockd_schedule_work(q, &ad->antic_work);
@@ -869,7 +1094,6 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
 	 * and writeback caches
 	 */
 	if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
-		update_write_batch(ad);
 		ad->current_batch_expires = jiffies +
 				ad->batch_expire[BLK_RW_SYNC];
 		ad->new_batch = 0;
@@ -888,6 +1112,13 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
 	}
 
 	as_put_io_context(rq);
+
+	/*
+	 * If elevator requested a queue switch, kick the queue in the
+	 * hope that this is right time for switch.
+	 */
+	if (ad->switch_queue)
+		kblockd_schedule_work(q, &ad->antic_work);
 out:
 	RQ_SET_STATE(rq, AS_RQ_POSTSCHED);
 }
@@ -908,6 +1139,9 @@ static void as_remove_queued_request(struct request_queue *q,
 
 	WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
 
+	BUG_ON(asq->nr_queued[data_dir] <= 0);
+	asq->nr_queued[data_dir]--;
+
 	ioc = RQ_IOC(rq);
 	if (ioc && ioc->aic) {
 		BUG_ON(!atomic_read(&ioc->aic->nr_queued));
@@ -1019,6 +1253,8 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
 	if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
 		atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
 	ad->nr_dispatched++;
+	as_log(ad, "dispatch req dir=%c nr_dispatched = %d",
+			data_dir ? 'R' : 'W', ad->nr_dispatched);
 }
 
 /*
@@ -1066,6 +1302,7 @@ static int as_dispatch_request(struct request_queue *q, int force)
 		}
 		asq->last_check_fifo[BLK_RW_ASYNC] = jiffies;
 
+		as_log(ad, "forced dispatch");
 		return dispatched;
 	}
 
@@ -1078,8 +1315,14 @@ static int as_dispatch_request(struct request_queue *q, int force)
 	if (!(reads || writes)
 		|| ad->antic_status == ANTIC_WAIT_REQ
 		|| ad->antic_status == ANTIC_WAIT_NEXT
-		|| ad->changed_batch)
+		|| ad->changed_batch) {
+		as_log(ad, "no dispatch. read_q=%d, writes_q=%d"
+			" ad->antic_status=%d, changed_batch=%d,"
+			" switch_queue=%d new_batch=%d", asq->nr_queued[1],
+			asq->nr_queued[0], ad->antic_status, ad->changed_batch,
+			ad->switch_queue, ad->new_batch);
 		return 0;
+	}
 
 	if (!(reads && writes && as_batch_expired(ad, asq))) {
 		/*
@@ -1092,6 +1335,7 @@ static int as_dispatch_request(struct request_queue *q, int force)
 				goto fifo_expired;
 
 			if (as_can_anticipate(ad, rq)) {
+				as_log(ad, "can_anticipate = 1");
 				as_antic_waitreq(ad);
 				return 0;
 			}
@@ -1111,6 +1355,8 @@ static int as_dispatch_request(struct request_queue *q, int force)
 	 * data direction (read / write)
 	 */
 
+	as_log(ad, "select a fresh batch and request");
+
 	if (reads) {
 		BUG_ON(RB_EMPTY_ROOT(&asq->sort_list[BLK_RW_SYNC]));
 
@@ -1125,6 +1371,7 @@ static int as_dispatch_request(struct request_queue *q, int force)
 			ad->changed_batch = 1;
 		}
 		ad->batch_data_dir = BLK_RW_SYNC;
+		as_log(ad, "new batch dir is sync");
 		rq = rq_entry_fifo(asq->fifo_list[BLK_RW_SYNC].next);
 		asq->last_check_fifo[ad->batch_data_dir] = jiffies;
 		goto dispatch_request;
@@ -1149,6 +1396,7 @@ dispatch_writes:
 			ad->new_batch = 0;
 		}
 		ad->batch_data_dir = BLK_RW_ASYNC;
+		as_log(ad, "new batch dir is async");
 		asq->current_write_count = asq->write_batch_count;
 		asq->write_batch_idled = 0;
 		rq = rq_entry_fifo(asq->fifo_list[BLK_RW_ASYNC].next);
@@ -1184,6 +1432,9 @@ fifo_expired:
 		ad->changed_batch = 0;
 	}
 
+	if (ad->switch_queue)
+		return 0;
+
 	/*
 	 * rq is the selected appropriate request.
 	 */
@@ -1207,6 +1458,11 @@ static void as_add_request(struct request_queue *q, struct request *rq)
 
 	rq->elevator_private = as_get_io_context(q->node);
 
+	asq->nr_queued[data_dir]++;
+	as_log(ad, "add a %c request read_q=%d write_q=%d",
+			data_dir ? 'R' : 'W', asq->nr_queued[1],
+			asq->nr_queued[0]);
+
 	if (RQ_IOC(rq)) {
 		as_update_iohist(ad, RQ_IOC(rq)->aic, rq);
 		atomic_inc(&RQ_IOC(rq)->aic->nr_queued);
@@ -1408,6 +1664,7 @@ static void *as_init_queue(struct request_queue *q)
 	ad->batch_expire[BLK_RW_ASYNC] = default_write_batch_expire;
 
 	ad->current_batch_expires = jiffies + ad->batch_expire[BLK_RW_SYNC];
+	ad->switch_queue = 0;
 
 	return ad;
 }
@@ -1493,6 +1750,11 @@ static struct elv_fs_entry as_attrs[] = {
 	AS_ATTR(antic_expire),
 	AS_ATTR(read_batch_expire),
 	AS_ATTR(write_batch_expire),
+#ifdef CONFIG_IOSCHED_AS_HIER
+	ELV_ATTR(fairness),
+	ELV_ATTR(slice_idle),
+	ELV_ATTR(slice_sync),
+#endif
 	__ATTR_NULL
 };
 
@@ -1514,8 +1776,14 @@ static struct elevator_type iosched_as = {
 		.trim =				as_trim,
 		.elevator_alloc_sched_queue_fn = as_alloc_as_queue,
 		.elevator_free_sched_queue_fn = as_free_as_queue,
+#ifdef CONFIG_IOSCHED_AS_HIER
+		.elevator_expire_ioq_fn =       as_expire_ioq,
+		.elevator_active_ioq_set_fn =   as_active_ioq_set,
 	},
-
+	.elevator_features = ELV_IOSCHED_NEED_FQ | ELV_IOSCHED_SINGLE_IOQ,
+#else
+	},
+#endif
 	.elevator_attrs = as_attrs,
 	.elevator_name = "anticipatory",
 	.elevator_owner = THIS_MODULE,
diff --git a/block/elevator-fq.c b/block/elevator-fq.c
index 25fdac6..c1d04b1 100644
--- a/block/elevator-fq.c
+++ b/block/elevator-fq.c
@@ -2523,6 +2523,7 @@ static void __elv_set_active_ioq(struct elv_fq_data *efqd, struct io_queue *ioq,
 		elv_clear_ioq_must_dispatch(ioq);
 		elv_clear_ioq_wait_busy_done(ioq);
 		elv_mark_ioq_slice_new(ioq);
+		elv_clear_ioq_must_expire(ioq);
 
 		del_timer(&efqd->idle_slice_timer);
 	}
@@ -2646,6 +2647,49 @@ static inline unsigned long elv_disk_time_used(struct request_queue *q,
 }
 
 /*
+ * Call iosched to let that elevator wants to expire the queue. This gives
+ * iosched like AS to say no (if it is in the middle of batch changeover or
+ * it is anticipating). it also allows iosched to do some house keeping
+ *
+ * force--> it is force dispatch and iosched must clean up its state. This
+ * 	     is useful when elevator wants to drain iosched and wants to
+ * 	     expire currnent active queue.
+ *
+ * slice_expired--> if 1, ioq slice expired hence elevator fair queuing logic
+ * 		    wants to switch the queue. iosched should allow that until
+ * 		    and unless necessary. Currently AS can deny the switch if
+ * 		    in the middle of batch switch.
+ *
+ * 		    if 0, time slice is still remaining. It is up to the iosched
+ * 		    whether it wants to wait on this queue or just want to
+ * 		    expire it and move on to next queue.
+ *
+ */
+static int elv_iosched_expire_ioq(struct request_queue *q, int slice_expired,
+					int force)
+{
+	struct elevator_queue *e = q->elevator;
+	struct io_queue *ioq = elv_active_ioq(q->elevator);
+	int ret = 1;
+
+	if (e->ops->elevator_expire_ioq_fn) {
+		ret = e->ops->elevator_expire_ioq_fn(q, ioq->sched_queue,
+							slice_expired, force);
+		/*
+		 * AS denied expiration of queue right now. Mark that elevator
+		 * layer has requested ioscheduler (as) to expire this queue.
+		 * Now as will try to expire this queue as soon as it can.
+		 * Now don't try to dispatch from this queue even if we get
+		 * a new request and if time slice is left. Do expire it once.
+		 */
+		if (!ret)
+			elv_mark_ioq_must_expire(ioq);
+	}
+
+	return ret;
+}
+
+/*
  * Do the accounting. Determine how much service (in terms of time slices)
  * current queue used and adjust the start, finish time of queue and vtime
  * of the tree accordingly.
@@ -2683,6 +2727,7 @@ void __elv_ioq_slice_expired(struct request_queue *q, struct io_queue *ioq)
 	elv_clear_ioq_wait_request(ioq);
 	elv_clear_ioq_wait_busy(ioq);
 	elv_clear_ioq_wait_busy_done(ioq);
+	elv_clear_ioq_must_expire(ioq);
 
 	/*
 	 * if ioq->slice_end = 0, that means a queue was expired before first
@@ -2821,16 +2866,18 @@ static int elv_should_preempt(struct request_queue *q, struct io_queue *new_ioq,
 static void elv_preempt_queue(struct request_queue *q, struct io_queue *ioq)
 {
 	elv_log_ioq(&q->elevator->efqd, ioq, "preempt");
-	elv_ioq_slice_expired(q);
+	if (elv_iosched_expire_ioq(q, 0, 1)) {
+		elv_ioq_slice_expired(q);
 
-	/*
-	 * Put the new queue at the front of the of the current list,
-	 * so we know that it will be selected next.
-	 */
+		/*
+		 * Put the new queue at the front of the of the current list,
+		 * so we know that it will be selected next.
+		 */
 
-	elv_activate_ioq(ioq, 1);
-	ioq->slice_end = 0;
-	elv_mark_ioq_slice_new(ioq);
+		elv_activate_ioq(ioq, 1);
+		ioq->slice_end = 0;
+		elv_mark_ioq_slice_new(ioq);
+	}
 }
 
 void elv_ioq_request_add(struct request_queue *q, struct request *rq)
@@ -3035,6 +3082,8 @@ void *elv_fq_select_ioq(struct request_queue *q, int force)
 	struct elv_fq_data *efqd = &q->elevator->efqd;
 	struct io_queue *new_ioq = NULL, *ioq = elv_active_ioq(q->elevator);
 	struct io_group *iog;
+	struct elevator_type *e = q->elevator->elevator_type;
+	int slice_expired = 1;
 
 	if (!elv_nr_busy_ioq(q->elevator))
 		return NULL;
@@ -3053,6 +3102,10 @@ void *elv_fq_select_ioq(struct request_queue *q, int force)
 			goto expire;
 	}
 
+	/* This queue has been marked for expiry. Try to expire it */
+	if (elv_ioq_must_expire(ioq))
+		goto expire;
+
 	/*
 	 * If there is only root group present, don't expire the queue for
 	 * single queue ioschedulers (noop, deadline, AS). It is unnecessary
@@ -3142,8 +3195,10 @@ void *elv_fq_select_ioq(struct request_queue *q, int force)
 		goto keep_queue;
 	}
 
+	slice_expired = 0;
 expire:
-	if (efqd->fairness >= 2 && !force && ioq && ioq->dispatched) {
+	if (efqd->fairness >= 2 && !force && ioq && ioq->dispatched
+	    && strcmp(e->elevator_name, "anticipatory")) {
 		/*
 		 * If there are request dispatched from this queue, don't
 		 * dispatch requests from new queue till all the requests from
@@ -3154,6 +3209,11 @@ expire:
 		 *
 		 * Set ioq = NULL so that no more requests are dispatched from
 		 * this queue.
+		 *
+		 * Note: Anticipatory already has the behavior where queue
+		 * switch is not allowed until requests from previous queue
+		 * have finished. Hence we don't have to get into this loop
+		 * in case of AS.
 		 */
 		elv_log_ioq(efqd, ioq, "select: wait for requests to finish"
 				" disp=%lu", ioq->dispatched);
@@ -3161,7 +3221,14 @@ expire:
 		goto keep_queue;
 	}
 
-	elv_ioq_slice_expired(q);
+	if (elv_iosched_expire_ioq(q, slice_expired, force))
+		elv_ioq_slice_expired(q);
+	else
+		/*
+		 * Not making ioq = NULL, as AS can deny queue expiration and
+		 * continue to dispatch from same queue
+		 */
+		goto keep_queue;
 new_queue:
 	ioq = elv_set_active_ioq(q, new_ioq);
 keep_queue:
@@ -3298,7 +3365,8 @@ void elv_ioq_completed_request(struct request_queue *q, struct request *rq)
 		}
 
 		if (elv_ioq_class_idle(ioq)) {
-			elv_ioq_slice_expired(q);
+			if (elv_iosched_expire_ioq(q, 1, 0))
+				elv_ioq_slice_expired(q);
 			goto done;
 		}
 
@@ -3343,7 +3411,8 @@ void elv_ioq_completed_request(struct request_queue *q, struct request *rq)
 					goto done;
 
 				/* Expire the queue */
-				elv_ioq_slice_expired(q);
+				if (elv_iosched_expire_ioq(q, 1, 0))
+					elv_ioq_slice_expired(q);
 			}
 		} else if (!ioq->nr_queued && !elv_close_cooperator(q, ioq, 1)
 			 && sync && (!rq_noidle(rq) || efqd->fairness))
diff --git a/block/elevator-fq.h b/block/elevator-fq.h
index baa6cee..c117d40 100644
--- a/block/elevator-fq.h
+++ b/block/elevator-fq.h
@@ -371,6 +371,8 @@ enum elv_queue_state_flags {
 	ELV_QUEUE_FLAG_slice_new,	  /* no requests dispatched in slice */
 	ELV_QUEUE_FLAG_wait_busy,	  /* wait for this queue to get busy */
 	ELV_QUEUE_FLAG_wait_busy_done,	  /* Have already waited on this queue*/
+	ELV_QUEUE_FLAG_must_expire,       /* Expire this queue even if it has
+					   * request and time slice left */
 };
 
 #define ELV_IO_QUEUE_FLAG_FNS(name)					\
@@ -395,6 +397,7 @@ ELV_IO_QUEUE_FLAG_FNS(idle_window)
 ELV_IO_QUEUE_FLAG_FNS(slice_new)
 ELV_IO_QUEUE_FLAG_FNS(wait_busy)
 ELV_IO_QUEUE_FLAG_FNS(wait_busy_done)
+ELV_IO_QUEUE_FLAG_FNS(must_expire)
 
 static inline struct io_service_tree *
 io_entity_service_tree(struct io_entity *entity)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 6d2c8db..dda7951 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -42,6 +42,7 @@ typedef int (elevator_update_idle_window_fn) (struct elevator_queue*, void*,
 						struct request*);
 typedef struct io_queue* (elevator_close_cooperator_fn) (struct request_queue*,
 						void*, int probe);
+typedef int (elevator_expire_ioq_fn) (struct request_queue*, void *, int, int);
 #endif
 
 struct elevator_ops
@@ -81,6 +82,7 @@ struct elevator_ops
 	elevator_should_preempt_fn *elevator_should_preempt_fn;
 	elevator_update_idle_window_fn *elevator_update_idle_window_fn;
 	elevator_close_cooperator_fn *elevator_close_cooperator_fn;
+	elevator_expire_ioq_fn  *elevator_expire_ioq_fn;
 #endif
 };
 
-- 
1.6.0.6

  parent reply	other threads:[~2009-07-02 20:06 UTC|newest]

Thread overview: 191+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-07-02 20:01 [RFC] IO scheduler based IO controller V6 Vivek Goyal
2009-07-02 20:01 ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 01/25] io-controller: Documentation Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 02/25] io-controller: Core of the B-WF2Q+ scheduler Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 03/25] io-controller: bfq support of in-class preemption Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 04/25] io-controller: Common flat fair queuing code in elevaotor layer Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 05/25] io-controller: Charge for time slice based on average disk rate Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 06/25] io-controller: Modify cfq to make use of flat elevator fair queuing Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 07/25] io-controller: core bfq scheduler changes for hierarchical setup Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 08/25] io-controller: cgroup related changes for hierarchical group support Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 09/25] io-controller: Common hierarchical fair queuing code in elevaotor layer Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
     [not found]   ` <1246564917-19603-10-git-send-email-vgoyal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-07-06  2:46     ` Gui Jianfeng
2009-07-06  2:46   ` Gui Jianfeng
2009-07-06  2:46     ` Gui Jianfeng
2009-07-06 14:16     ` Vivek Goyal
2009-07-06 14:16       ` Vivek Goyal
     [not found]       ` <20090706141650.GD8279-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-07-07  1:40         ` [PATCH] io-controller: Get rid of css id from io cgroup Gui Jianfeng
2009-07-07  1:40           ` Gui Jianfeng
2009-07-08 14:04           ` Vivek Goyal
2009-07-08 14:04             ` Vivek Goyal
     [not found]           ` <4A52A77E.8050203-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-07-08 14:04             ` Vivek Goyal
     [not found]     ` <4A51657B.7000008-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-07-06 14:16       ` [PATCH 09/25] io-controller: Common hierarchical fair queuing code in elevaotor layer Vivek Goyal
2009-07-02 20:01 ` [PATCH 10/25] io-controller: cfq changes to use " Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 11/25] io-controller: Export disk time used and nr sectors dipatched through cgroups Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-08  2:16   ` Gui Jianfeng
2009-07-08  2:16     ` Gui Jianfeng
2009-07-08 14:00     ` Vivek Goyal
2009-07-08 14:00       ` Vivek Goyal
     [not found]     ` <4A54018C.5090804-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-07-08 14:00       ` Vivek Goyal
     [not found]   ` <1246564917-19603-12-git-send-email-vgoyal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-07-08  2:16     ` Gui Jianfeng
2009-07-02 20:01 ` [PATCH 12/25] io-controller: idle for sometime on sync queue before expiring it Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
     [not found] ` <1246564917-19603-1-git-send-email-vgoyal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-07-02 20:01   ` [PATCH 01/25] io-controller: Documentation Vivek Goyal
2009-07-02 20:01   ` [PATCH 02/25] io-controller: Core of the B-WF2Q+ scheduler Vivek Goyal
2009-07-02 20:01   ` [PATCH 03/25] io-controller: bfq support of in-class preemption Vivek Goyal
2009-07-02 20:01   ` [PATCH 04/25] io-controller: Common flat fair queuing code in elevaotor layer Vivek Goyal
2009-07-02 20:01   ` [PATCH 05/25] io-controller: Charge for time slice based on average disk rate Vivek Goyal
2009-07-02 20:01   ` [PATCH 06/25] io-controller: Modify cfq to make use of flat elevator fair queuing Vivek Goyal
2009-07-02 20:01   ` [PATCH 07/25] io-controller: core bfq scheduler changes for hierarchical setup Vivek Goyal
2009-07-02 20:01   ` [PATCH 08/25] io-controller: cgroup related changes for hierarchical group support Vivek Goyal
2009-07-02 20:01   ` [PATCH 09/25] io-controller: Common hierarchical fair queuing code in elevaotor layer Vivek Goyal
2009-07-02 20:01   ` [PATCH 10/25] io-controller: cfq changes to use " Vivek Goyal
2009-07-02 20:01   ` [PATCH 11/25] io-controller: Export disk time used and nr sectors dipatched through cgroups Vivek Goyal
2009-07-02 20:01   ` [PATCH 12/25] io-controller: idle for sometime on sync queue before expiring it Vivek Goyal
2009-07-02 20:01   ` [PATCH 13/25] io-controller: Wait for requests to complete from last queue before new queue is scheduled Vivek Goyal
2009-07-02 20:01   ` [PATCH 14/25] io-controller: Separate out queue and data Vivek Goyal
2009-07-02 20:01   ` [PATCH 15/25] io-conroller: Prepare elevator layer for single queue schedulers Vivek Goyal
2009-07-02 20:01   ` [PATCH 16/25] io-controller: noop changes for hierarchical fair queuing Vivek Goyal
2009-07-02 20:01   ` [PATCH 17/25] io-controller: deadline " Vivek Goyal
2009-07-02 20:01   ` [PATCH 18/25] io-controller: anticipatory " Vivek Goyal
2009-07-02 20:01   ` [PATCH 19/25] blkio_cgroup patches from Ryo to track async bios Vivek Goyal
2009-07-02 20:01   ` [PATCH 20/25] io-controller: map async requests to appropriate cgroup Vivek Goyal
2009-07-02 20:01   ` [PATCH 21/25] io-controller: Per cgroup request descriptor support Vivek Goyal
2009-07-02 20:01   ` [PATCH 22/25] io-controller: Per io group bdi congestion interface Vivek Goyal
2009-07-02 20:01   ` [PATCH 23/25] io-controller: Support per cgroup per device weights and io class Vivek Goyal
2009-07-02 20:01   ` [PATCH 24/25] io-controller: Debug hierarchical IO scheduling Vivek Goyal
2009-07-02 20:01   ` [PATCH 25/25] io-controller: experimental debug patch for async queue wait before expiry Vivek Goyal
2009-07-08  3:56   ` [RFC] IO scheduler based IO controller V6 Balbir Singh
2009-07-10  1:56   ` [PATCH] io-controller: implement per group request allocation limitation Gui Jianfeng
2009-07-27  2:10   ` [RFC] IO scheduler based IO controller V6 Gui Jianfeng
2009-07-02 20:01 ` [PATCH 13/25] io-controller: Wait for requests to complete from last queue before new queue is scheduled Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:09   ` Nauman Rafique
2009-07-02 20:09     ` Nauman Rafique
     [not found]     ` <e98e18940907021309u1f784b3at409b55ba46ed108c-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-07-02 20:17       ` Vivek Goyal
2009-07-02 20:17     ` Vivek Goyal
2009-07-02 20:17       ` Vivek Goyal
     [not found]   ` <1246564917-19603-14-git-send-email-vgoyal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-07-02 20:09     ` Nauman Rafique
2009-07-02 20:01 ` [PATCH 14/25] io-controller: Separate out queue and data Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 15/25] io-conroller: Prepare elevator layer for single queue schedulers Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 16/25] io-controller: noop changes for hierarchical fair queuing Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 17/25] io-controller: deadline " Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` Vivek Goyal [this message]
2009-07-02 20:01   ` [PATCH 18/25] io-controller: anticipatory " Vivek Goyal
2009-07-02 20:01 ` [PATCH 19/25] blkio_cgroup patches from Ryo to track async bios Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 20/25] io-controller: map async requests to appropriate cgroup Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
     [not found]   ` <1246564917-19603-21-git-send-email-vgoyal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-08-03  2:13     ` Gui Jianfeng
2009-08-03  2:13   ` Gui Jianfeng
2009-08-03  2:13     ` Gui Jianfeng
     [not found]     ` <4A7647DA.5050607-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-08-04  1:25       ` Vivek Goyal
2009-08-04  1:25     ` Vivek Goyal
2009-08-04  1:25       ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 21/25] io-controller: Per cgroup request descriptor support Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-08  3:27   ` Gui Jianfeng
2009-07-08  3:27     ` Gui Jianfeng
     [not found]     ` <4A54121D.5090008-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-07-08 13:57       ` Vivek Goyal
2009-07-08 13:57     ` Vivek Goyal
2009-07-08 13:57       ` Vivek Goyal
     [not found]   ` <1246564917-19603-22-git-send-email-vgoyal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-07-08  3:27     ` Gui Jianfeng
2009-07-21  5:37     ` Gui Jianfeng
2009-07-21  5:37   ` Gui Jianfeng
     [not found]     ` <4A655434.5060404-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-07-21  5:55       ` Nauman Rafique
2009-07-21  5:55     ` Nauman Rafique
2009-07-21  5:55       ` Nauman Rafique
2009-07-21 14:01       ` Vivek Goyal
2009-07-21 14:01         ` Vivek Goyal
     [not found]         ` <20090721140134.GB540-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-07-21 17:57           ` Nauman Rafique
2009-07-21 17:57         ` Nauman Rafique
2009-07-21 17:57           ` Nauman Rafique
     [not found]       ` <e98e18940907202255y5c7c546ei95d87e5a451ad0c2-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-07-21 14:01         ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 22/25] io-controller: Per io group bdi congestion interface Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-17  0:16   ` Munehiro Ikeda
2009-07-17  0:16     ` Munehiro Ikeda
2009-07-17 13:52     ` Vivek Goyal
2009-07-17 13:52       ` Vivek Goyal
     [not found]     ` <4A5FC2CA.1040609-MDRzhb/z0dd8UrSeD/g0lQ@public.gmane.org>
2009-07-17 13:52       ` Vivek Goyal
     [not found]   ` <1246564917-19603-23-git-send-email-vgoyal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-07-17  0:16     ` Munehiro Ikeda
2009-07-02 20:01 ` [PATCH 23/25] io-controller: Support per cgroup per device weights and io class Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 24/25] io-controller: Debug hierarchical IO scheduling Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-02 20:01 ` [PATCH 25/25] io-controller: experimental debug patch for async queue wait before expiry Vivek Goyal
2009-07-02 20:01   ` Vivek Goyal
2009-07-08  3:56 ` [RFC] IO scheduler based IO controller V6 Balbir Singh
2009-07-08  3:56   ` Balbir Singh
     [not found]   ` <20090708035621.GB3215-SINUvgVNF2CyUtPGxGje5AC/G2K4zDHf@public.gmane.org>
2009-07-08 13:41     ` Vivek Goyal
2009-07-08 13:41   ` Vivek Goyal
2009-07-08 13:41     ` Vivek Goyal
     [not found]     ` <20090708134114.GA24048-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-07-08 14:39       ` Balbir Singh
2009-07-08 14:39     ` Balbir Singh
2009-07-08 14:39       ` Balbir Singh
     [not found]       ` <20090708143925.GE3215-SINUvgVNF2CyUtPGxGje5AC/G2K4zDHf@public.gmane.org>
2009-07-09  1:58         ` Vivek Goyal
2009-07-09  1:58       ` Vivek Goyal
2009-07-09  1:58         ` Vivek Goyal
2009-07-10  1:56 ` [PATCH] io-controller: implement per group request allocation limitation Gui Jianfeng
2009-07-10  1:56   ` Gui Jianfeng
2009-07-13 16:03   ` Vivek Goyal
2009-07-13 16:03     ` Vivek Goyal
2009-07-13 21:08     ` Munehiro Ikeda
2009-07-13 21:08       ` Munehiro Ikeda
2009-07-14  7:45       ` Gui Jianfeng
2009-07-14  7:45         ` Gui Jianfeng
2009-08-04  2:00         ` Munehiro Ikeda
2009-08-04  2:00           ` Munehiro Ikeda
     [not found]           ` <4A77964A.7040602-MDRzhb/z0dd8UrSeD/g0lQ@public.gmane.org>
2009-08-04  6:38             ` Gui Jianfeng
2009-08-04 22:37             ` Vivek Goyal
2009-08-04  6:38           ` Gui Jianfeng
2009-08-04  6:38             ` Gui Jianfeng
2009-08-04 22:37           ` Vivek Goyal
2009-08-04 22:37             ` Vivek Goyal
     [not found]         ` <4A5C377F.4040105-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-08-04  2:00           ` Munehiro Ikeda
     [not found]       ` <4A5BA238.3030902-MDRzhb/z0dd8UrSeD/g0lQ@public.gmane.org>
2009-07-14  7:45         ` Gui Jianfeng
     [not found]     ` <20090713160352.GA3714-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-07-13 21:08       ` Munehiro Ikeda
2009-07-14  7:37       ` Gui Jianfeng
2009-07-14  7:37         ` Gui Jianfeng
2009-08-04  2:02   ` Munehiro Ikeda
2009-08-04  2:02     ` Munehiro Ikeda
2009-08-04  6:41     ` Gui Jianfeng
2009-08-04  6:41       ` Gui Jianfeng
     [not found]     ` <4A7796D2.4030104-MDRzhb/z0dd8UrSeD/g0lQ@public.gmane.org>
2009-08-04  6:41       ` Gui Jianfeng
2009-08-04  2:04   ` Munehiro Ikeda
2009-08-04  2:04     ` Munehiro Ikeda
     [not found]     ` <4A779719.1070900-MDRzhb/z0dd8UrSeD/g0lQ@public.gmane.org>
2009-08-04  6:45       ` Gui Jianfeng
2009-08-04  6:45     ` Gui Jianfeng
2009-08-04  6:45       ` Gui Jianfeng
     [not found]   ` <4A569FC5.7090801-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-07-13 16:03     ` Vivek Goyal
2009-08-04  2:02     ` Munehiro Ikeda
2009-08-04  2:04     ` Munehiro Ikeda
2009-07-27  2:10 ` [RFC] IO scheduler based IO controller V6 Gui Jianfeng
     [not found]   ` <4A6D0C9A.3080600-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-07-27 12:55     ` Vivek Goyal
2009-07-27 12:55   ` Vivek Goyal
2009-07-27 12:55     ` Vivek Goyal
2009-07-28  3:27     ` Vivek Goyal
2009-07-28  3:27       ` Vivek Goyal
     [not found]       ` <20090728032712.GC3620-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-07-28  3:36         ` Gui Jianfeng
2009-07-28  3:36       ` Gui Jianfeng
2009-07-28  3:36         ` Gui Jianfeng
     [not found]     ` <20090727125503.GA24449-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2009-07-28  3:27       ` Vivek Goyal
2009-07-28 11:36       ` Gui Jianfeng
2009-07-29  9:07       ` Gui Jianfeng
2009-07-28 11:36     ` Gui Jianfeng
2009-07-29  9:07     ` Gui Jianfeng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1246564917-19603-19-git-send-email-vgoyal@redhat.com \
    --to=vgoyal@redhat.com \
    --cc=agk@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=dhaval@linux.vnet.ibm.com \
    --cc=dm-devel@redhat.com \
    --cc=dpshah@google.com \
    --cc=fchecconi@gmail.com \
    --cc=fernando@oss.ntt.co.jp \
    --cc=guijianfeng@cn.fujitsu.com \
    --cc=jbaron@redhat.com \
    --cc=jens.axboe@oracle.com \
    --cc=jmoyer@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizf@cn.fujitsu.com \
    --cc=m-ikeda@ds.jp.nec.com \
    --cc=mikew@google.com \
    --cc=nauman@google.com \
    --cc=paolo.valente@unimore.it \
    --cc=peterz@infradead.org \
    --cc=righi.andrea@gmail.com \
    --cc=ryov@valinux.co.jp \
    --cc=s-uchida@ap.jp.nec.com \
    --cc=snitzer@redhat.com \
    --cc=taka@valinux.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.