linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] [PATCH 3/8] cfq-iosched: Introduce vdisktime and io weight for CFQ queue
@ 2010-11-15  0:53 Gui Jianfeng
  0 siblings, 0 replies; 2+ messages in thread
From: Gui Jianfeng @ 2010-11-15  0:53 UTC (permalink / raw)
  To: Vivek Goyal, Jens Axboe
  Cc: Corrado Zoccolo, Chad Talbott, Nauman Rafique, Divyesh Shah,
	linux kernel mailing list, Gui Jianfeng

Introduce vdisktime and io weight for CFQ queue scheduling. Currently, io priority
maps to a range [100,1000]. It also gets rid of cfq_slice_offset() logic and makes
use the same scheduling algorithm as CFQ group does. This helps for CFQ queue and
group scheduling on the same service tree.

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
---
 block/cfq-iosched.c |  194 ++++++++++++++++++++++++++++++++++----------------
 1 files changed, 132 insertions(+), 62 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 5cce1e8..ef88931 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -102,10 +102,7 @@ struct io_sched_entity {
 	struct cfq_rb_root *service_tree;
 	/* service_tree member */
 	struct rb_node rb_node;
-	/* service_tree key, represent the position on the tree */
-	unsigned long rb_key;
-
-	/* group service_tree key */
+	/* service_tree key */
 	u64 vdisktime;
 	bool on_st;
 	bool is_group_entity;
@@ -118,6 +115,8 @@ struct io_sched_entity {
 struct cfq_queue {
 	/* The schedule entity */
 	struct io_sched_entity queue_entity;
+	/* Reposition time */
+	unsigned long reposition_time;
 	/* reference count */
 	atomic_t ref;
 	/* various state flags, see below */
@@ -306,6 +305,22 @@ struct cfq_data {
 	struct rcu_head rcu;
 };
 
+/*
+ * Map io priority(7 ~ 0) to io weight(100 ~ 1000)
+ */
+static inline unsigned int cfq_prio_to_weight(unsigned short ioprio)
+{
+	unsigned int step;
+
+	BUG_ON(ioprio >= IOPRIO_BE_NR);
+
+	step = (BLKIO_WEIGHT_MAX - BLKIO_WEIGHT_MIN) / (IOPRIO_BE_NR - 1);
+	if (ioprio == 0)
+		return BLKIO_WEIGHT_MAX;
+
+	return BLKIO_WEIGHT_MIN + (IOPRIO_BE_NR - ioprio - 1) * step;
+}
+
 static inline struct cfq_queue *
 cfqq_of_entity(struct io_sched_entity *io_entity)
 {
@@ -551,12 +566,13 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
 }
 
-static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg)
+static inline u64
+cfq_scale_slice(unsigned long delta, struct io_sched_entity *entity)
 {
 	u64 d = delta << CFQ_SERVICE_SHIFT;
 
 	d = d * BLKIO_WEIGHT_DEFAULT;
-	do_div(d, cfqg->group_entity.weight);
+	do_div(d, entity->weight);
 	return d;
 }
 
@@ -581,16 +597,16 @@ static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
 static void update_min_vdisktime(struct cfq_rb_root *st)
 {
 	u64 vdisktime = st->min_vdisktime;
-	struct io_sched_entity *group_entity;
+	struct io_sched_entity *entity;
 
 	if (st->active) {
-		group_entity = rb_entry_entity(st->active);
-		vdisktime = group_entity->vdisktime;
+		entity = rb_entry_entity(st->active);
+		vdisktime = entity->vdisktime;
 	}
 
 	if (st->left) {
-		group_entity = rb_entry_entity(st->left);
-		vdisktime = min_vdisktime(vdisktime, group_entity->vdisktime);
+		entity = rb_entry_entity(st->left);
+		vdisktime = min_vdisktime(vdisktime, entity->vdisktime);
 	}
 
 	st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime);
@@ -838,16 +854,6 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
 }
 
-static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
-				      struct cfq_queue *cfqq)
-{
-	/*
-	 * just an approximation, should be ok.
-	 */
-	return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) -
-		       cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
-}
-
 static inline s64
 entity_key(struct cfq_rb_root *st, struct io_sched_entity *entity)
 {
@@ -983,7 +989,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 
 	/* Can't update vdisktime while group is on service tree */
 	cfq_rb_erase(&group_entity->rb_node, st);
-	group_entity->vdisktime += cfq_scale_slice(charge, cfqg);
+	group_entity->vdisktime += cfq_scale_slice(charge, group_entity);
 	__cfq_group_service_tree_add(st, cfqg);
 
 	/* This group is being expired. Save the context */
@@ -1214,13 +1220,14 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	struct io_sched_entity *queue_entity;
 	struct rb_node **p, *parent;
 	struct io_sched_entity *__queue_entity;
-	unsigned long rb_key;
-	struct cfq_rb_root *service_tree;
+	struct cfq_rb_root *service_tree, *orig_st;
 	int left;
 	int new_cfqq = 1;
 	int group_changed = 0;
+	s64 key;
 
 	queue_entity = &cfqq->queue_entity;
+	orig_st = queue_entity->service_tree;
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 	if (!cfqd->cfq_group_isolation
@@ -1228,8 +1235,16 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	    && cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) {
 		/* Move this cfq to root group */
 		cfq_log_cfqq(cfqd, cfqq, "moving to root group");
-		if (!RB_EMPTY_NODE(&queue_entity->rb_node))
+		if (!RB_EMPTY_NODE(&queue_entity->rb_node)) {
 			cfq_group_service_tree_del(cfqd, cfqq->cfqg);
+			/*
+			 * Group changed, dequeue this CFQ queue from the
+			 * original service tree.
+			 */
+			cfq_rb_erase(&queue_entity->rb_node,
+				     queue_entity->service_tree);
+			orig_st->total_weight -= queue_entity->weight;
+		}
 		cfqq->orig_cfqg = cfqq->cfqg;
 		cfqq->cfqg = &cfqd->root_group;
 		atomic_inc(&cfqd->root_group.ref);
@@ -1238,8 +1253,16 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		   && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
 		/* cfqq is sequential now needs to go to its original group */
 		BUG_ON(cfqq->cfqg != &cfqd->root_group);
-		if (!RB_EMPTY_NODE(&queue_entity->rb_node))
+		if (!RB_EMPTY_NODE(&queue_entity->rb_node)) {
 			cfq_group_service_tree_del(cfqd, cfqq->cfqg);
+			/*
+			 * Group changed, dequeue this CFQ queue from the
+			 * original service tree.
+			 */
+			cfq_rb_erase(&queue_entity->rb_node,
+				     queue_entity->service_tree);
+			orig_st->total_weight -= queue_entity->weight;
+		}
 		cfq_put_cfqg(cfqq->cfqg);
 		cfqq->cfqg = cfqq->orig_cfqg;
 		cfqq->orig_cfqg = NULL;
@@ -1250,50 +1273,67 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 
 	service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
 						cfqq_type(cfqq));
+
+	/*
+	 * For the time being, put the newly added CFQ queue at the end of the
+	 * service tree.
+	 */
+	if (RB_EMPTY_NODE(&queue_entity->rb_node)) {
+		/*
+		 * If this CFQ queue moves to another group, the original
+		 * vdisktime makes no sense any more, reset the vdisktime
+		 * here.
+		 */
+		parent = rb_last(&service_tree->rb);
+		if (parent) {
+			__queue_entity = rb_entry_entity(parent);
+			queue_entity->vdisktime = __queue_entity->vdisktime +
+						  CFQ_IDLE_DELAY;
+		} else
+			queue_entity->vdisktime = service_tree->min_vdisktime;
+
+		goto insert;
+	}
+
+	/*
+	 * Ok, we get here, this CFQ queue is on the service tree, dequeue it
+	 * firstly.
+	 */
+	cfq_rb_erase(&queue_entity->rb_node,
+		     queue_entity->service_tree);
+	orig_st->total_weight -= queue_entity->weight;
+
+	new_cfqq = 0;
 	if (cfq_class_idle(cfqq)) {
-		rb_key = CFQ_IDLE_DELAY;
 		parent = rb_last(&service_tree->rb);
 		if (parent && parent != &queue_entity->rb_node) {
 			__queue_entity = rb_entry(parent,
 						  struct io_sched_entity,
 						  rb_node);
-			rb_key += __queue_entity->rb_key;
+			queue_entity->vdisktime = __queue_entity->vdisktime +
+						  CFQ_IDLE_DELAY;
 		} else
-			rb_key += jiffies;
+			queue_entity->vdisktime = service_tree->min_vdisktime;
 	} else if (!add_front) {
 		/*
-		 * Get our rb key offset. Subtract any residual slice
-		 * value carried from last service. A negative resid
-		 * count indicates slice overrun, and this should position
-		 * the next service time further away in the tree.
-		 */
-		rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
-		rb_key -= cfqq->slice_resid;
-		cfqq->slice_resid = 0;
-	} else {
-		rb_key = -HZ;
-		__queue_entity = cfq_rb_first(service_tree);
-		rb_key += __queue_entity ? __queue_entity->rb_key : jiffies;
-	}
-
-	if (!RB_EMPTY_NODE(&queue_entity->rb_node)) {
-		new_cfqq = 0;
-		/*
-		 * same position, nothing more to do
+		 * We charge the CFQ queue by the time this queue runs, and
+		 * repsition it on the service tree.
 		 */
-		if (rb_key == queue_entity->rb_key &&
-		    queue_entity->service_tree == service_tree)
-			return;
+		unsigned int used_sl;
 
-		cfq_rb_erase(&queue_entity->rb_node,
-			     queue_entity->service_tree);
-		queue_entity->service_tree = NULL;
+		used_sl = cfq_cfqq_slice_usage(cfqq);
+		queue_entity->vdisktime += cfq_scale_slice(used_sl,
+							   queue_entity);
+	} else {
+		queue_entity->vdisktime = service_tree->min_vdisktime;
 	}
 
+insert:
 	left = 1;
 	parent = NULL;
 	queue_entity->service_tree = service_tree;
 	p = &service_tree->rb.rb_node;
+	key = entity_key(service_tree, queue_entity);
 	while (*p) {
 		struct rb_node **n;
 
@@ -1304,7 +1344,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		/*
 		 * sort by key, that represents service time.
 		 */
-		if (time_before(rb_key, __queue_entity->rb_key))
+		if (key < entity_key(service_tree, __queue_entity))
 			n = &(*p)->rb_left;
 		else {
 			n = &(*p)->rb_right;
@@ -1317,10 +1357,12 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	if (left)
 		service_tree->left = &queue_entity->rb_node;
 
-	queue_entity->rb_key = rb_key;
 	rb_link_node(&queue_entity->rb_node, parent, p);
 	rb_insert_color(&queue_entity->rb_node, &service_tree->rb);
+	update_min_vdisktime(service_tree);
 	service_tree->count++;
+	service_tree->total_weight += queue_entity->weight;
+	cfqq->reposition_time = jiffies;
 	if ((add_front || !new_cfqq) && !group_changed)
 		return;
 	cfq_group_service_tree_add(cfqd, cfqq->cfqg);
@@ -1422,15 +1464,19 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	struct io_sched_entity *queue_entity;
+	struct cfq_rb_root *service_tree;
+
 	cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
 	BUG_ON(!cfq_cfqq_on_rr(cfqq));
 	cfq_clear_cfqq_on_rr(cfqq);
 
 	queue_entity = &cfqq->queue_entity;
+	service_tree = queue_entity->service_tree;
 
 	if (!RB_EMPTY_NODE(&queue_entity->rb_node)) {
 		cfq_rb_erase(&queue_entity->rb_node,
 			     queue_entity->service_tree);
+		service_tree->total_weight -= queue_entity->weight;
 		queue_entity->service_tree = NULL;
 	}
 	if (cfqq->p_root) {
@@ -2132,24 +2178,35 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
 	}
 }
 
+/*
+ * The time when a CFQ queue is put onto a service tree is recoreded in
+ * cfqq->reposition_time. Currently, we check the first priority CFQ queues
+ * on each service tree, and select the workload type that contain the lowest
+ * reposition_time CFQ queue among them.
+ */
 static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
 				struct cfq_group *cfqg, enum wl_prio_t prio)
 {
 	struct io_sched_entity *queue_entity;
+	struct cfq_queue *cfqq;
+	unsigned long lowest_start_time;
 	int i;
-	bool key_valid = false;
-	unsigned long lowest_key = 0;
+	bool time_valid = false;
 	enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
 
+	/*
+	 * TODO: We may take io priority into account when choosing a workload
+	 * type. But for the time being just make use of reposition_time only.
+	 */
 	for (i = 0; i <= SYNC_WORKLOAD; ++i) {
-		/* select the one with lowest rb_key */
 		queue_entity = cfq_rb_first(service_tree_for(cfqg, prio, i));
+		cfqq = cfqq_of_entity(queue_entity);
 		if (queue_entity &&
-		    (!key_valid ||
-		     time_before(queue_entity->rb_key, lowest_key))) {
-			lowest_key = queue_entity->rb_key;
+		    (!time_valid ||
+		     cfqq->reposition_time < lowest_start_time)) {
+			lowest_start_time = cfqq->reposition_time;
 			cur_best = i;
-			key_valid = true;
+			time_valid = true;
 		}
 	}
 
@@ -2811,10 +2868,13 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 {
 	struct task_struct *tsk = current;
 	int ioprio_class;
+	struct io_sched_entity *queue_entity;
 
 	if (!cfq_cfqq_prio_changed(cfqq))
 		return;
 
+	queue_entity = &cfqq->queue_entity;
+
 	ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio);
 	switch (ioprio_class) {
 	default:
@@ -2841,6 +2901,8 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 		break;
 	}
 
+	queue_entity->weight = cfq_prio_to_weight(cfqq->ioprio);
+
 	/*
 	 * keep track of original prio settings in case we have to temporarily
 	 * elevate the priority of this queue
@@ -3571,6 +3633,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
  */
 static void cfq_prio_boost(struct cfq_queue *cfqq)
 {
+	struct io_sched_entity *queue_entity;
+
+	queue_entity = &cfqq->queue_entity;
 	if (has_fs_excl()) {
 		/*
 		 * boost idle prio on transactions that would lock out other
@@ -3587,6 +3652,11 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
 		cfqq->ioprio_class = cfqq->org_ioprio_class;
 		cfqq->ioprio = cfqq->org_ioprio;
 	}
+
+	/*
+	 * update the io weight if io priority gets changed.
+	 */
+	queue_entity->weight = cfq_prio_to_weight(cfqq->ioprio);
 }
 
 static inline int __cfq_may_queue(struct cfq_queue *cfqq)
-- 
1.6.5.2






-- 
Regards
Gui Jianfeng

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [RFC] [PATCH 3/8] cfq-iosched: Introduce vdisktime and io weight for CFQ queue
       [not found]   ` <20101115193352.GB3396@redhat.com>
@ 2010-11-29  2:32     ` Gui Jianfeng
  0 siblings, 0 replies; 2+ messages in thread
From: Gui Jianfeng @ 2010-11-29  2:32 UTC (permalink / raw)
  To: Vivek Goyal
  Cc: Jens Axboe, Corrado Zoccolo, Chad Talbott, Nauman Rafique,
	Divyesh Shah, linux kernel mailing list

Vivek Goyal wrote:
> On Sun, Nov 14, 2010 at 04:24:56PM +0800, Gui Jianfeng wrote:
>> Introduce vdisktime and io weight for CFQ queue scheduling. Currently, io priority
>> maps to a range [100,1000]. It also gets rid of cfq_slice_offset() logic and makes
>> use the same scheduling algorithm as CFQ group does. This helps for CFQ queue and
>> group scheduling on the same service tree.
>>
> 
> Gui,
> 
> I think we can't get rid of cfq_slice_offset() logic altogether because 
> I believe this piece can help provide some service differentiation between
> queues on SSDs or when idling is not enabled. Though that service
> differentiation is highly unpredicatable and becomes even less visible
> when NCQ is enabled.
> 
> So we shall have to replace with some similar logic. When a new queue
> entity gets backlogged on service tree, give it some jump in vdisktime
> based on ioprio. Lower ioprio gets higher vdisktime jump etc.

Vivek,

Ok, I'll consider your suggestion.

> 
> To test this, I would say take an SSD, set the queue depth to 1, and 
> then run bunch of threads with different ioprio. First see if without
> patch do you see any service differentiation and then run it again with
> your patch applied for comparision.

Ok

> 
> 
>> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
>> ---
>>  block/cfq-iosched.c |  194 ++++++++++++++++++++++++++++++++++----------------
>>  1 files changed, 132 insertions(+), 62 deletions(-)
>>
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index 5cce1e8..ef88931 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -102,10 +102,7 @@ struct io_sched_entity {
>>  	struct cfq_rb_root *service_tree;
>>  	/* service_tree member */
>>  	struct rb_node rb_node;
>> -	/* service_tree key, represent the position on the tree */
>> -	unsigned long rb_key;
>> -
>> -	/* group service_tree key */
>> +	/* service_tree key */
>>  	u64 vdisktime;
>>  	bool on_st;
>>  	bool is_group_entity;
>> @@ -118,6 +115,8 @@ struct io_sched_entity {
>>  struct cfq_queue {
>>  	/* The schedule entity */
>>  	struct io_sched_entity queue_entity;
>> +	/* Reposition time */
>> +	unsigned long reposition_time;
>>  	/* reference count */
>>  	atomic_t ref;
>>  	/* various state flags, see below */
>> @@ -306,6 +305,22 @@ struct cfq_data {
>>  	struct rcu_head rcu;
>>  };
>>  
>> +/*
>> + * Map io priority(7 ~ 0) to io weight(100 ~ 1000)
>> + */
>> +static inline unsigned int cfq_prio_to_weight(unsigned short ioprio)
>> +{
>> +	unsigned int step;
>> +
>> +	BUG_ON(ioprio >= IOPRIO_BE_NR);
>> +
>> +	step = (BLKIO_WEIGHT_MAX - BLKIO_WEIGHT_MIN) / (IOPRIO_BE_NR - 1);
>> +	if (ioprio == 0)
>> +		return BLKIO_WEIGHT_MAX;
>> +
>> +	return BLKIO_WEIGHT_MIN + (IOPRIO_BE_NR - ioprio - 1) * step;
>> +}
>> +
> 
> What's the rationale behind above formula? How does it map prio to
> weigths?

This formula map ioprio to weight as follow:

prio	0     1     2    3    4    5    6     7
weight  1000  868   740  612  484  356  228   100 (new prio to weight mapping)

> 
> Could we just do following.
> 
> 	step = BLKIO_WEIGHT_MAX/IOPRIO_BE_NR
> 
> 	return BLKIO_WEIGHT_MAX - (ioprio * step) 
> 
> above should map prio to weights as follows.
> 
> slice  180   160   140  120  100  80   60    40 (old prio to slice mapping)
> prio	0     1     2    3    4    5    6     7
> weight  1000  875   750  625  500  375 250   125 (new prio to weight mapping)
> 
>>  static inline struct cfq_queue *
>>  cfqq_of_entity(struct io_sched_entity *io_entity)
>>  {
>> @@ -551,12 +566,13 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
>>  	return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
>>  }
>>  
>> -static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg)
>> +static inline u64
>> +cfq_scale_slice(unsigned long delta, struct io_sched_entity *entity)
>>  {
>>  	u64 d = delta << CFQ_SERVICE_SHIFT;
>>  
>>  	d = d * BLKIO_WEIGHT_DEFAULT;
>> -	do_div(d, cfqg->group_entity.weight);
>> +	do_div(d, entity->weight);
> 
> This can go in previous patch?

Sure.

> 
>>  	return d;
> 
>>  }
>>  
>> @@ -581,16 +597,16 @@ static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
>>  static void update_min_vdisktime(struct cfq_rb_root *st)
>>  {
>>  	u64 vdisktime = st->min_vdisktime;
>> -	struct io_sched_entity *group_entity;
>> +	struct io_sched_entity *entity;
>>  
>>  	if (st->active) {
>> -		group_entity = rb_entry_entity(st->active);
>> -		vdisktime = group_entity->vdisktime;
>> +		entity = rb_entry_entity(st->active);
>> +		vdisktime = entity->vdisktime;
>>  	}
>>  
>>  	if (st->left) {
>> -		group_entity = rb_entry_entity(st->left);
>> -		vdisktime = min_vdisktime(vdisktime, group_entity->vdisktime);
>> +		entity = rb_entry_entity(st->left);
>> +		vdisktime = min_vdisktime(vdisktime, entity->vdisktime);
>>  	}
>>  
> 
> Can go in previous patch?

Sure.

> 
>>  	st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime);
>> @@ -838,16 +854,6 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
>>  	return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
>>  }
>>  
>> -static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
>> -				      struct cfq_queue *cfqq)
>> -{
>> -	/*
>> -	 * just an approximation, should be ok.
>> -	 */
>> -	return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) -
>> -		       cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
>> -}
>> -
>>  static inline s64
>>  entity_key(struct cfq_rb_root *st, struct io_sched_entity *entity)
>>  {
>> @@ -983,7 +989,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
>>  
>>  	/* Can't update vdisktime while group is on service tree */
>>  	cfq_rb_erase(&group_entity->rb_node, st);
>> -	group_entity->vdisktime += cfq_scale_slice(charge, cfqg);
>> +	group_entity->vdisktime += cfq_scale_slice(charge, group_entity);
> 
> can go in previous patch?

Sure.

> 
> [..]
>>  
>> +/*
>> + * The time when a CFQ queue is put onto a service tree is recoreded in
>> + * cfqq->reposition_time. Currently, we check the first priority CFQ queues
>> + * on each service tree, and select the workload type that contain the lowest
>> + * reposition_time CFQ queue among them.
>> + */
> 
> What is the rational behind reposition_time. Can you explain it a bit more
> that why do we need it. I can't figure it out yet.

In original CFQ, rb_key is cross trees. We select the lowest rb_key one among
three workload trees. When vdisktime is introduced, vdisktime is maintained
by each tree self. But we still want to choose the first priority one cross trees,
So for the time being, I record the reposition_time for each cfqq, and select
the smallest reposition_time one as the next servicing workload when selecting
a new workload.

Gui

> 
> Vivek
> 
>>  static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
>>  				struct cfq_group *cfqg, enum wl_prio_t prio)
>>  {
>>  	struct io_sched_entity *queue_entity;
>> +	struct cfq_queue *cfqq;
>> +	unsigned long lowest_start_time;
>>  	int i;
>> -	bool key_valid = false;
>> -	unsigned long lowest_key = 0;
>> +	bool time_valid = false;
>>  	enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
>>  
>> +	/*
>> +	 * TODO: We may take io priority into account when choosing a workload
>> +	 * type. But for the time being just make use of reposition_time only.
>> +	 */
>>  	for (i = 0; i <= SYNC_WORKLOAD; ++i) {
>> -		/* select the one with lowest rb_key */
>>  		queue_entity = cfq_rb_first(service_tree_for(cfqg, prio, i));
>> +		cfqq = cfqq_of_entity(queue_entity);
>>  		if (queue_entity &&
>> -		    (!key_valid ||
>> -		     time_before(queue_entity->rb_key, lowest_key))) {
>> -			lowest_key = queue_entity->rb_key;
>> +		    (!time_valid ||
>> +		     cfqq->reposition_time < lowest_start_time)) {
>> +			lowest_start_time = cfqq->reposition_time;
>>  			cur_best = i;
>> -			key_valid = true;
>> +			time_valid = true;
>>  		}
> 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2010-11-29  2:33 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-11-15  0:53 [RFC] [PATCH 3/8] cfq-iosched: Introduce vdisktime and io weight for CFQ queue Gui Jianfeng
     [not found] <4CDF7BC5.9080803@cn.fujitsu.com>
     [not found] ` <4CDF9CD8.8010207@cn.fujitsu.com>
     [not found]   ` <20101115193352.GB3396@redhat.com>
2010-11-29  2:32     ` Gui Jianfeng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).