All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: torvalds@linux-foundation.org, peterz@infradead.org,
	tglx@linutronix.de, linux-pm@vger.kernel.org,
	Tejun Heo <tj@kernel.org>
Subject: [PATCH 7/9] workqueue: don't butcher idle workers on an offline CPU
Date: Tue, 17 Jul 2012 10:12:27 -0700	[thread overview]
Message-ID: <1342545149-3515-8-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1342545149-3515-1-git-send-email-tj@kernel.org>

Currently, during CPU offlining, after all pending work items are
drained, the trustee butchers all workers.  Also, on CPU onlining
failure, workqueue_cpu_callback() ensures that the first idle worker
is destroyed.  Combined, these guarantee that an offline CPU doesn't
have any worker for it once all the lingering work items are finished.

This guarantee isn't really necessary and makes CPU on/offlining more
expensive than needs to be, especially for platforms which use CPU
hotplug for powersaving.

This patch lets offline CPUs removes idle worker butchering from the
trustee and let a CPU which failed onlining keep the created first
worker.  The first worker is created if the CPU doesn't have any
during CPU_DOWN_PREPARE and started right away.  If onlining succeeds,
the rebind_workers() call in CPU_ONLINE will rebind it like any other
workers.  If onlining fails, the worker is left alone till the next
try.

This makes CPU hotplugs cheaper by allowing global_cwqs to keep
workers across them and simplifies code.

Note that trustee doesn't re-arm idle timer when it's done and thus
the disassociated global_cwq will keep all workers until it comes back
online.  This will be improved by further patches.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c |   94 ++++++++--------------------------------------------
 1 files changed, 14 insertions(+), 80 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6927fec..acfabb2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -175,7 +175,6 @@ struct worker_pool {
 
 	struct mutex		manager_mutex;	/* mutex manager should hold */
 	struct ida		worker_ida;	/* L: for worker IDs */
-	struct worker		*first_idle;	/* L: first idle worker */
 };
 
 /*
@@ -3477,16 +3476,6 @@ static void gcwq_release_management(struct global_cwq *gcwq)
 	__ret1 < 0 ? -1 : 0;						\
 })
 
-static bool gcwq_has_idle_workers(struct global_cwq *gcwq)
-{
-	struct worker_pool *pool;
-
-	for_each_worker_pool(pool, gcwq)
-		if (!list_empty(&pool->idle_list))
-			return true;
-	return false;
-}
-
 static int __cpuinit trustee_thread(void *__gcwq)
 {
 	struct global_cwq *gcwq = __gcwq;
@@ -3494,7 +3483,6 @@ static int __cpuinit trustee_thread(void *__gcwq)
 	struct worker *worker;
 	struct work_struct *work;
 	struct hlist_node *pos;
-	long rc;
 	int i;
 
 	BUG_ON(gcwq->cpu != smp_processor_id());
@@ -3597,25 +3585,6 @@ static int __cpuinit trustee_thread(void *__gcwq)
 			break;
 	}
 
-	/*
-	 * Either all works have been scheduled and cpu is down, or
-	 * cpu down has already been canceled.  Wait for and butcher
-	 * all workers till we're canceled.
-	 */
-	do {
-		rc = trustee_wait_event(gcwq_has_idle_workers(gcwq));
-
-		i = 0;
-		for_each_worker_pool(pool, gcwq) {
-			while (!list_empty(&pool->idle_list)) {
-				worker = list_first_entry(&pool->idle_list,
-							  struct worker, entry);
-				destroy_worker(worker);
-			}
-			i |= pool->nr_workers;
-		}
-	} while (i && rc >= 0);
-
 	gcwq_release_management(gcwq);
 
 	/* notify completion */
@@ -3658,10 +3627,8 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 	unsigned int cpu = (unsigned long)hcpu;
 	struct global_cwq *gcwq = get_gcwq(cpu);
 	struct task_struct *new_trustee = NULL;
-	struct worker *new_workers[NR_WORKER_POOLS] = { };
 	struct worker_pool *pool;
 	unsigned long flags;
-	int i;
 
 	action &= ~CPU_TASKS_FROZEN;
 
@@ -3672,14 +3639,22 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 		if (IS_ERR(new_trustee))
 			return notifier_from_errno(PTR_ERR(new_trustee));
 		kthread_bind(new_trustee, cpu);
-		/* fall through */
+		break;
+
 	case CPU_UP_PREPARE:
-		i = 0;
 		for_each_worker_pool(pool, gcwq) {
-			BUG_ON(pool->first_idle);
-			new_workers[i] = create_worker(pool);
-			if (!new_workers[i++])
-				goto err_destroy;
+			struct worker *worker;
+
+			if (pool->nr_workers)
+				continue;
+
+			worker = create_worker(pool);
+			if (!worker)
+				return NOTIFY_BAD;
+
+			spin_lock_irq(&gcwq->lock);
+			start_worker(worker);
+			spin_unlock_irq(&gcwq->lock);
 		}
 	}
 
@@ -3694,23 +3669,10 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 		gcwq->trustee_state = TRUSTEE_START;
 		wake_up_process(gcwq->trustee);
 		wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE);
-		/* fall through */
-	case CPU_UP_PREPARE:
-		i = 0;
-		for_each_worker_pool(pool, gcwq) {
-			BUG_ON(pool->first_idle);
-			pool->first_idle = new_workers[i++];
-		}
 		break;
 
 	case CPU_POST_DEAD:
 		gcwq->trustee_state = TRUSTEE_BUTCHER;
-		/* fall through */
-	case CPU_UP_CANCELED:
-		for_each_worker_pool(pool, gcwq) {
-			destroy_worker(pool->first_idle);
-			pool->first_idle = NULL;
-		}
 		break;
 
 	case CPU_DOWN_FAILED:
@@ -3730,39 +3692,12 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 		rebind_workers(gcwq);
 
 		gcwq_release_management(gcwq);
-
-		/*
-		 * Trustee is done and there might be no worker left.
-		 * Put the first_idle in and request a real manager to
-		 * take a look.
-		 */
-		for_each_worker_pool(pool, gcwq) {
-			spin_unlock_irq(&gcwq->lock);
-			kthread_bind(pool->first_idle->task, cpu);
-			spin_lock_irq(&gcwq->lock);
-			pool->flags |= POOL_MANAGE_WORKERS;
-			pool->first_idle->flags &= ~WORKER_UNBOUND;
-			start_worker(pool->first_idle);
-			pool->first_idle = NULL;
-		}
 		break;
 	}
 
 	spin_unlock_irqrestore(&gcwq->lock, flags);
 
 	return notifier_from_errno(0);
-
-err_destroy:
-	if (new_trustee)
-		kthread_stop(new_trustee);
-
-	spin_lock_irqsave(&gcwq->lock, flags);
-	for (i = 0; i < NR_WORKER_POOLS; i++)
-		if (new_workers[i])
-			destroy_worker(new_workers[i]);
-	spin_unlock_irqrestore(&gcwq->lock, flags);
-
-	return NOTIFY_BAD;
 }
 
 /*
@@ -3775,7 +3710,6 @@ static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 {
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_CANCELED:
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
 		return workqueue_cpu_callback(nfb, action, hcpu);
-- 
1.7.7.3


  parent reply	other threads:[~2012-07-17 17:12 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-17 17:12 [PATCHSET] workqueue: reimplement CPU hotplug to keep idle workers Tejun Heo
2012-07-17 17:12 ` [PATCH 1/9] workqueue: perform cpu down operations from low priority cpu_notifier() Tejun Heo
2012-07-20 21:52   ` Paul E. McKenney
2012-07-20 21:58     ` Tejun Heo
2012-07-21 21:36       ` Paul E. McKenney
2012-07-22 16:43         ` [PATCH] workqueue: fix spurious CPU locality WARN from process_one_work() Tejun Heo
2012-07-22 21:23           ` Paul E. McKenney
2012-07-17 17:12 ` [PATCH 2/9] workqueue: drop CPU_DYING notifier operation Tejun Heo
2012-07-17 17:12 ` [PATCH 3/9] workqueue: ROGUE workers are UNBOUND workers Tejun Heo
2012-07-17 17:12 ` [PATCH 4/9] workqueue: use mutex for global_cwq manager exclusion Tejun Heo
2012-07-17 17:12 ` [PATCH 5/9] workqueue: drop @bind from create_worker() Tejun Heo
2012-07-17 17:12 ` [PATCH 6/9] workqueue: reimplement CPU online rebinding to handle idle workers Tejun Heo
2012-07-17 17:12 ` Tejun Heo [this message]
2012-07-17 17:12 ` [PATCH 8/9] workqueue: remove CPU offline trustee Tejun Heo
2012-07-17 17:12 ` [PATCH 9/9] workqueue: simplify CPU hotplug code Tejun Heo
2012-07-17 18:43 ` [PATCHSET] workqueue: reimplement CPU hotplug to keep idle workers Rafael J. Wysocki
2012-07-17 19:40   ` Tejun Heo
2012-07-20 15:48 ` Peter Zijlstra
2012-07-20 17:02   ` Tejun Heo
2012-07-20 17:21     ` Peter Zijlstra
2012-07-20 17:50       ` Tejun Heo
2012-07-20 18:22         ` Peter Zijlstra
2012-07-20 18:34           ` Tejun Heo
2012-07-20 19:44             ` Rafael J. Wysocki
2012-07-20 19:41               ` Tejun Heo
2012-07-21  6:42               ` Shilimkar, Santosh
2012-07-23  8:38               ` Peter De Schrijver
2012-07-20 16:39 ` Peter Zijlstra
2012-07-20 16:52   ` Tejun Heo
2012-07-20 17:01     ` Peter Zijlstra
2012-07-20 17:08       ` Tejun Heo
2012-07-20 17:19         ` Peter Zijlstra
2012-07-20 17:43           ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1342545149-3515-8-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.