linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: axboe@kernel.dk, vgoyal@redhat.com
Cc: ctalbott@google.com, rni@google.com,
	linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>
Subject: [PATCH 03/16] elevator: clear auxiliary data earlier during elevator switch
Date: Wed,  1 Feb 2012 12:50:16 -0800	[thread overview]
Message-ID: <1328129429-11823-4-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1328129429-11823-1-git-send-email-tj@kernel.org>

Elevator switch tries hard to keep as much as context until new
elevator is ready so that it can revert to the original state if
initializing the new elevator fails for some reason.  Unfortunately,
with more auxiliary contexts to manage, this makes elevator init and
exit paths too complex and fragile.

This patch makes elevator_switch() unregister the current elevator and
flush icq's before start initializing the new one.  As we still keep
the old elevator itself, the only difference is that we lose icq's on
rare occassions of switching failure, which isn't critical at all.

Note that this makes explicit elevator parameter to
elevator_init_queue() and __elv_register_queue() unnecessary as they
always can use the current elevator.

This patch enables block cgroup cleanups.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
---
 block/elevator.c |   88 +++++++++++++++++++++++++++---------------------------
 1 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/block/elevator.c b/block/elevator.c
index 91e18f8..42543e3 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -168,11 +168,10 @@ static struct elevator_type *elevator_get(const char *name)
 	return e;
 }
 
-static int elevator_init_queue(struct request_queue *q,
-			       struct elevator_queue *eq)
+static int elevator_init_queue(struct request_queue *q)
 {
-	eq->elevator_data = eq->type->ops.elevator_init_fn(q);
-	if (eq->elevator_data)
+	q->elevator->elevator_data = q->elevator->type->ops.elevator_init_fn(q);
+	if (q->elevator->elevator_data)
 		return 0;
 	return -ENOMEM;
 }
@@ -235,7 +234,6 @@ static void elevator_release(struct kobject *kobj)
 int elevator_init(struct request_queue *q, char *name)
 {
 	struct elevator_type *e = NULL;
-	struct elevator_queue *eq;
 	int err;
 
 	if (unlikely(q->elevator))
@@ -269,17 +267,16 @@ int elevator_init(struct request_queue *q, char *name)
 		}
 	}
 
-	eq = elevator_alloc(q, e);
-	if (!eq)
+	q->elevator = elevator_alloc(q, e);
+	if (!q->elevator)
 		return -ENOMEM;
 
-	err = elevator_init_queue(q, eq);
+	err = elevator_init_queue(q);
 	if (err) {
-		kobject_put(&eq->kobj);
+		kobject_put(&q->elevator->kobj);
 		return err;
 	}
 
-	q->elevator = eq;
 	return 0;
 }
 EXPORT_SYMBOL(elevator_init);
@@ -848,8 +845,9 @@ static struct kobj_type elv_ktype = {
 	.release	= elevator_release,
 };
 
-int __elv_register_queue(struct request_queue *q, struct elevator_queue *e)
+int elv_register_queue(struct request_queue *q)
 {
+	struct elevator_queue *e = q->elevator;
 	int error;
 
 	error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
@@ -867,11 +865,6 @@ int __elv_register_queue(struct request_queue *q, struct elevator_queue *e)
 	}
 	return error;
 }
-
-int elv_register_queue(struct request_queue *q)
-{
-	return __elv_register_queue(q, q->elevator);
-}
 EXPORT_SYMBOL(elv_register_queue);
 
 void elv_unregister_queue(struct request_queue *q)
@@ -954,39 +947,47 @@ EXPORT_SYMBOL_GPL(elv_unregister);
  */
 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 {
-	struct elevator_queue *old_elevator, *e;
+	struct elevator_queue *old = q->elevator;
+	bool registered = old->registered;
 	int err;
 
-	/* allocate new elevator */
-	e = elevator_alloc(q, new_e);
-	if (!e)
-		return -ENOMEM;
-
-	err = elevator_init_queue(q, e);
-	if (err) {
-		kobject_put(&e->kobj);
-		return err;
-	}
-
-	/* turn on BYPASS and drain all requests w/ elevator private data */
+	/*
+	 * Turn on BYPASS and drain all requests w/ elevator private data.
+	 * Block layer doesn't call into a quiesced elevator - all requests
+	 * are directly put on the dispatch list without elevator data
+	 * using INSERT_BACK.  All requests have SOFTBARRIER set and no
+	 * merge happens either.
+	 */
 	elv_quiesce_start(q);
 
-	/* unregister old queue, register new one and kill old elevator */
-	if (q->elevator->registered) {
+	/* unregister and clear all auxiliary data of the old elevator */
+	if (registered)
 		elv_unregister_queue(q);
-		err = __elv_register_queue(q, e);
-		if (err)
-			goto fail_register;
-	}
 
-	/* done, clear io_cq's, switch elevators and turn off BYPASS */
 	spin_lock_irq(q->queue_lock);
 	ioc_clear_queue(q);
-	old_elevator = q->elevator;
-	q->elevator = e;
 	spin_unlock_irq(q->queue_lock);
 
-	elevator_exit(old_elevator);
+	/* allocate, init and register new elevator */
+	err = -ENOMEM;
+	q->elevator = elevator_alloc(q, new_e);
+	if (!q->elevator)
+		goto fail_init;
+
+	err = elevator_init_queue(q);
+	if (err) {
+		kobject_put(&q->elevator->kobj);
+		goto fail_init;
+	}
+
+	if (registered) {
+		err = elv_register_queue(q);
+		if (err)
+			goto fail_register;
+	}
+
+	/* done, kill the old one and finish */
+	elevator_exit(old);
 	elv_quiesce_end(q);
 
 	blk_add_trace_msg(q, "elv switch: %s", e->type->elevator_name);
@@ -994,11 +995,10 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	return 0;
 
 fail_register:
-	/*
-	 * switch failed, exit the new io scheduler and reattach the old
-	 * one again (along with re-adding the sysfs dir)
-	 */
-	elevator_exit(e);
+	elevator_exit(q->elevator);
+fail_init:
+	/* switch failed, restore and re-register old elevator */
+	q->elevator = old;
 	elv_register_queue(q);
 	elv_quiesce_end(q);
 
-- 
1.7.7.3


  parent reply	other threads:[~2012-02-01 20:53 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-01 20:50 [PATCHSET] blkcg: kill policy node and blkg->dev, take#4 Tejun Heo
2012-02-01 20:50 ` [PATCH 01/16] blkcg: make CONFIG_BLK_CGROUP bool Tejun Heo
2012-02-02  6:53   ` Li Zefan
2012-02-02 17:14     ` Tejun Heo
2012-02-01 20:50 ` [PATCH 02/16] cfq: don't register propio policy if !CONFIG_CFQ_GROUP_IOSCHED Tejun Heo
2012-02-01 20:50 ` Tejun Heo [this message]
2012-02-01 20:50 ` [PATCH 04/16] elevator: make elevator_init_fn() return 0/-errno Tejun Heo
2012-02-01 20:50 ` [PATCH 05/16] block: implement blk_queue_bypass_start/end() Tejun Heo
2012-02-01 20:50 ` [PATCH 06/16] block: extend queue bypassing to cover blkcg policies Tejun Heo
2012-02-01 20:50 ` [PATCH 07/16] blkcg: shoot down blkio_groups on elevator switch Tejun Heo
2012-02-01 20:50 ` [PATCH 08/16] blkcg: move rcu_read_lock() outside of blkio_group get functions Tejun Heo
2012-02-01 20:50 ` [PATCH 09/16] blkcg: update blkg get functions take blkio_cgroup as parameter Tejun Heo
2012-02-01 20:50 ` [PATCH 10/16] blkcg: use q and plid instead of opaque void * for blkio_group association Tejun Heo
2012-02-01 20:50 ` [PATCH 11/16] blkcg: add blkio_policy[] array and allow one policy per policy ID Tejun Heo
2012-02-01 20:50 ` [PATCH 12/16] blkcg: use the usual get blkg path for root blkio_group Tejun Heo
2012-02-01 20:50 ` [PATCH 13/16] blkcg: factor out blkio_group creation Tejun Heo
2012-02-01 20:50 ` [PATCH 14/16] blkcg: don't allow or retain configuration of missing devices Tejun Heo
2012-02-01 20:50 ` [PATCH 15/16] blkcg: kill blkio_policy_node Tejun Heo
2012-02-01 20:50 ` [PATCH 16/16] blkcg: kill the mind-bending blkg->dev Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2012-01-23 23:09 [PATCHSET] blkcg: kill policy node and blkg->dev, take#3 Tejun Heo
2012-01-23 23:09 ` [PATCH 03/16] elevator: clear auxiliary data earlier during elevator switch Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1328129429-11823-4-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=ctalbott@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rni@google.com \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).