All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] workqueue: clear workers of a pool after the CPU is offline
@ 2013-07-25 10:52 Lai Jiangshan
  2013-07-25 15:31 ` Tejun Heo
  0 siblings, 1 reply; 9+ messages in thread
From: Lai Jiangshan @ 2013-07-25 10:52 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Lai Jiangshan, linux-kernel

The unbound pools and their workers can be destroyed/cleared
when their refcnt become zero. But the cpu pool can't be destroyed
due to they are always referenced, their refcnt are always > 0.

We don't want to destroy the cpu pools, but we want to destroy
the workers of the pool when the pool is full idle after the cpu
is offline. This is the default behavior in old days until
we removed the trustee_thread().

We need to find a new way to restore this behavior,
We add offline_pool() and POOL_OFFLINE flag to do so.

1) Before we try to clear workers, we set the POOL_OFFLINE to the pool,
   and pool will not serve to works, any work which is tried to be queued
   on that pool will be rejected except chained works.

2) when all the pending works are finished and all workers are idle, worker
   thread will schedule offline_pool() to clear workers.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c |   89 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f02c4a4..2617895 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -63,13 +63,18 @@ enum {
 	 * %WORKER_UNBOUND set and concurrency management disabled, and may
 	 * be executing on any CPU.  The pool behaves as an unbound one.
 	 *
-	 * Note that DISASSOCIATED should be flipped only while holding
-	 * manager_mutex to avoid changing binding state while
+	 * OFFLINE is a further state of DISASSOCIATED when the cpu had
+	 * finished offline and all the workers will exit after they
+	 * finish the last works of the pool.
+	 *
+	 * Note that DISASSOCIATED and OFFLINE should be flipped only while
+	 * holding manager_mutex to avoid changing binding state while
 	 * create_worker() is in progress.
 	 */
 	POOL_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
-	POOL_DISASSOCIATED	= 1 << 2,	/* cpu can't serve workers */
+	POOL_DISASSOCIATED	= 1 << 2,	/* pool dissociates its cpu */
 	POOL_FREEZING		= 1 << 3,	/* freeze in progress */
+	POOL_OFFLINE		= 1 << 4,	/* pool can't serve work */
 
 	/* worker flags */
 	WORKER_STARTED		= 1 << 0,	/* started */
@@ -164,6 +169,7 @@ struct worker_pool {
 	struct mutex		manager_arb;	/* manager arbitration */
 	struct mutex		manager_mutex;	/* manager exclusion */
 	struct idr		worker_idr;	/* MG: worker IDs and iteration */
+	struct work_struct	offline_work;	/* offline the pool */
 
 	struct workqueue_attrs	*attrs;		/* I: worker attributes */
 	struct hlist_node	hash_node;	/* PL: unbound_pool_hash node */
@@ -1372,6 +1378,12 @@ retry:
 			  wq->name, cpu);
 	}
 
+	if (unlikely(pwq->pool->flags & POOL_OFFLINE) &&
+	    WARN_ON_ONCE(!is_chained_work(wq))) {
+		spin_unlock(&pwq->pool->lock);
+		return;
+	}
+
 	/* pwq determined, queue */
 	trace_workqueue_queue_work(req_cpu, pwq, work);
 
@@ -1784,7 +1796,7 @@ static void start_worker(struct worker *worker)
 }
 
 /**
- * create_and_start_worker - create and start a worker for a pool
+ * create_and_start_worker - create and start the initial worker for a pool
  * @pool: the target pool
  *
  * Grab the managership of @pool and create and start a new worker for it.
@@ -1798,6 +1810,7 @@ static int create_and_start_worker(struct worker_pool *pool)
 	worker = create_worker(pool);
 	if (worker) {
 		spin_lock_irq(&pool->lock);
+		pool->flags &= ~POOL_OFFLINE;
 		start_worker(worker);
 		spin_unlock_irq(&pool->lock);
 	}
@@ -2091,6 +2104,54 @@ static bool manage_workers(struct worker *worker)
 }
 
 /**
+ * offline_pool - try to offline a pool
+ * @work: embedded offline work item of the target pool
+ *
+ * Try to offline a pool by destroying all its workers.
+ *
+ * offline_pool() only destroys workers which are idle on the idle_list.
+ * If any worker leaves idle by some reasons, it can not be destroyed,
+ * but this work item will be rescheduled by the worker's worker_thread()
+ * again in this case. So offline_pool() may be called multi times
+ * to finish offline pool in this rare case.
+ *
+ * offline_pool() is always scheduled by system_unbound_wq even the pool
+ * is high priority pool:
+ *  1) The pool of system_unbound_wq is always online.
+ *  2) The latency of offline_pool() doesn't matter.
+ */
+static void offline_pool(struct work_struct *work)
+{
+	struct worker_pool *pool;
+	struct worker *worker;
+
+	pool = container_of(work, struct worker_pool, offline_work);
+
+	mutex_lock(&pool->manager_mutex);
+	if (!(pool->flags & POOL_OFFLINE)) {
+		/* the pool is back online, cancel offline */
+		mutex_unlock(&pool->manager_mutex);
+		return;
+	}
+
+	spin_lock_irq(&pool->lock);
+	while (!list_empty(&pool->idle_list)) {
+		worker = list_first_entry(&pool->idle_list,
+					  struct worker, entry);
+		destroy_worker(worker);
+	}
+	spin_unlock_irq(&pool->lock);
+
+	mutex_unlock(&pool->manager_mutex);
+}
+
+static inline bool need_to_offline_pool(struct worker_pool *pool)
+{
+	return (pool->flags & POOL_OFFLINE) &&
+	       (pool->nr_workers == pool->nr_idle);
+}
+
+/**
  * process_one_work - process single work
  * @worker: self
  * @work: work to process
@@ -2251,6 +2312,7 @@ static int worker_thread(void *__worker)
 {
 	struct worker *worker = __worker;
 	struct worker_pool *pool = worker->pool;
+	bool pool_offline;
 
 	/* tell the scheduler that this is a workqueue worker */
 	worker->task->flags |= PF_WQ_WORKER;
@@ -2320,8 +2382,11 @@ sleep:
 	 * event.
 	 */
 	worker_enter_idle(worker);
+	pool_offline = need_to_offline_pool(pool);
 	__set_current_state(TASK_INTERRUPTIBLE);
 	spin_unlock_irq(&pool->lock);
+	if (pool_offline)
+		queue_work(system_unbound_wq, &pool->offline_work);
 	schedule();
 	goto woke_up;
 }
@@ -3451,6 +3516,7 @@ static int init_worker_pool(struct worker_pool *pool)
 	pool->cpu = -1;
 	pool->node = NUMA_NO_NODE;
 	pool->flags |= POOL_DISASSOCIATED;
+	pool->flags |= POOL_OFFLINE;
 	INIT_LIST_HEAD(&pool->worklist);
 	INIT_LIST_HEAD(&pool->idle_list);
 	hash_init(pool->busy_hash);
@@ -3465,6 +3531,7 @@ static int init_worker_pool(struct worker_pool *pool)
 	mutex_init(&pool->manager_arb);
 	mutex_init(&pool->manager_mutex);
 	idr_init(&pool->worker_idr);
+	INIT_WORK(&pool->offline_work, offline_pool);
 
 	INIT_HLIST_NODE(&pool->hash_node);
 	pool->refcnt = 1;
@@ -4702,6 +4769,7 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
 						 void *hcpu)
 {
 	int cpu = (unsigned long)hcpu;
+	struct worker_pool *pool;
 	struct work_struct unbind_work;
 	struct workqueue_struct *wq;
 
@@ -4720,6 +4788,19 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
 		/* wait for per-cpu unbinding to finish */
 		flush_work(&unbind_work);
 		break;
+	case CPU_UP_CANCELED:
+	case CPU_POST_DEAD:
+		for_each_cpu_worker_pool(pool, cpu) {
+			mutex_lock(&pool->manager_mutex);
+			spin_lock_irq(&pool->lock);
+
+			pool->flags |= POOL_OFFLINE;
+			wake_up_worker(pool);
+
+			spin_unlock_irq(&pool->lock);
+			mutex_unlock(&pool->manager_mutex);
+		}
+		break;
 	}
 	return NOTIFY_OK;
 }
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] workqueue: clear workers of a pool after the CPU is offline
  2013-07-25 10:52 [PATCH] workqueue: clear workers of a pool after the CPU is offline Lai Jiangshan
@ 2013-07-25 15:31 ` Tejun Heo
  2013-07-26  2:13   ` Lai Jiangshan
  0 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2013-07-25 15:31 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: linux-kernel

Hello, Lai.

On Thu, Jul 25, 2013 at 06:52:02PM +0800, Lai Jiangshan wrote:
> The unbound pools and their workers can be destroyed/cleared
> when their refcnt become zero. But the cpu pool can't be destroyed
> due to they are always referenced, their refcnt are always > 0.
> 
> We don't want to destroy the cpu pools, but we want to destroy
> the workers of the pool when the pool is full idle after the cpu
> is offline. This is the default behavior in old days until
> we removed the trustee_thread().
> 
> We need to find a new way to restore this behavior,
> We add offline_pool() and POOL_OFFLINE flag to do so.

Hmmm... if I'm not confused, now the cpu pools just behave like a
normal unbound pool when the cpu goes down, which means that the idle
cpu workers will exit once idle timeout is reached, right?  I really
don't think it'd be worthwhile to add extra logic to accelerate the
process.

Note that there actually are benefits to doing it asynchronously as
CPUs go up and down very frequently on mobile platforms and destroying
idle workers as soon as possible would just mean that we'd be doing a
lot of work which isn't necessary.  I mean, we even grew an explicit
mechanism to park kthreads to avoid repeatedly creating and destroying
per-cpu kthreads as cpus go up and down.  I don't see any point in
adding code to go the other direction.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] workqueue: clear workers of a pool after the CPU is offline
  2013-07-25 15:31 ` Tejun Heo
@ 2013-07-26  2:13   ` Lai Jiangshan
  2013-07-26  3:07     ` Tejun Heo
  0 siblings, 1 reply; 9+ messages in thread
From: Lai Jiangshan @ 2013-07-26  2:13 UTC (permalink / raw)
  To: Tejun Heo; +Cc: linux-kernel

On 07/25/2013 11:31 PM, Tejun Heo wrote:
> Hello, Lai.
> 
> On Thu, Jul 25, 2013 at 06:52:02PM +0800, Lai Jiangshan wrote:
>> The unbound pools and their workers can be destroyed/cleared
>> when their refcnt become zero. But the cpu pool can't be destroyed
>> due to they are always referenced, their refcnt are always > 0.
>>
>> We don't want to destroy the cpu pools, but we want to destroy
>> the workers of the pool when the pool is full idle after the cpu
>> is offline. This is the default behavior in old days until
>> we removed the trustee_thread().
>>
>> We need to find a new way to restore this behavior,
>> We add offline_pool() and POOL_OFFLINE flag to do so.
> 
> Hmmm... if I'm not confused, now the cpu pools just behave like a
> normal unbound pool when the cpu goes down,

cpu pools are always referenced, they don't behave like unbound pool.

> which means that the idle
> cpu workers will exit once idle timeout is reached, right? 

No, no code to force the cpu workers quit currently.
you can just offline a cpu to see what happened to the workers.

> I really
> don't think it'd be worthwhile to add extra logic to accelerate the
> process.
> 
> Note that there actually are benefits to doing it asynchronously as
> CPUs go up and down very frequently on mobile platforms and destroying
> idle workers as soon as possible would just mean that we'd be doing a
> lot of work which isn't necessary.  I mean, we even grew an explicit
> mechanism to park kthreads to avoid repeatedly creating and destroying
> per-cpu kthreads as cpus go up and down.  I don't see any point in
> adding code to go the other direction.
> 
> Thanks.
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] workqueue: clear workers of a pool after the CPU is offline
  2013-07-26  2:13   ` Lai Jiangshan
@ 2013-07-26  3:07     ` Tejun Heo
  2013-07-26  3:47       ` Lai Jiangshan
  0 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2013-07-26  3:07 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: linux-kernel

Hello,

On Fri, Jul 26, 2013 at 10:13:25AM +0800, Lai Jiangshan wrote:
> > Hmmm... if I'm not confused, now the cpu pools just behave like a
> > normal unbound pool when the cpu goes down,
> 
> cpu pools are always referenced, they don't behave like unbound pool.

Yeah sure, they don't get destroyed but pool management functions the
same.

> > which means that the idle
> > cpu workers will exit once idle timeout is reached, right? 
> 
> No, no code to force the cpu workers quit currently.
> you can just offline a cpu to see what happened to the workers.

Hmmm?  The idle timer thing doesn't work?  Why?

-- 
tejun

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] workqueue: clear workers of a pool after the CPU is offline
  2013-07-26  3:07     ` Tejun Heo
@ 2013-07-26  3:47       ` Lai Jiangshan
  2013-07-26 10:22         ` Tejun Heo
  0 siblings, 1 reply; 9+ messages in thread
From: Lai Jiangshan @ 2013-07-26  3:47 UTC (permalink / raw)
  To: Tejun Heo; +Cc: linux-kernel

On 07/26/2013 11:07 AM, Tejun Heo wrote:
> Hello,
> 
> On Fri, Jul 26, 2013 at 10:13:25AM +0800, Lai Jiangshan wrote:
>>> Hmmm... if I'm not confused, now the cpu pools just behave like a
>>> normal unbound pool when the cpu goes down,
>>
>> cpu pools are always referenced, they don't behave like unbound pool.
> 
> Yeah sure, they don't get destroyed but pool management functions the
> same.
> 
>>> which means that the idle
>>> cpu workers will exit once idle timeout is reached, right? 
>>
>> No, no code to force the cpu workers quit currently.
>> you can just offline a cpu to see what happened to the workers.
> 
> Hmmm?  The idle timer thing doesn't work?  Why?
> 

any worker can't kill itself.
managers always tries to leave 2 workers.

so the workers of the offline cpu pool can't be totally destroyed.

(In old days, we also have idle timer, but the last workers are killed by trustee_thread())

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] workqueue: clear workers of a pool after the CPU is offline
  2013-07-26  3:47       ` Lai Jiangshan
@ 2013-07-26 10:22         ` Tejun Heo
  2013-07-26 16:55           ` Lai Jiangshan
  0 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2013-07-26 10:22 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: linux-kernel

On Fri, Jul 26, 2013 at 11:47:04AM +0800, Lai Jiangshan wrote:
> any worker can't kill itself.
> managers always tries to leave 2 workers.
> 
> so the workers of the offline cpu pool can't be totally destroyed.

But we *do* want to keep them around as CPUs taken offline are likely
to come online at some point and destroying all of them saves only
~16k of memory while adding more work while CPUs are on/offlined which
can be very frequent on mobile devices.  The change was *intentional*.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] workqueue: clear workers of a pool after the CPU is offline
  2013-07-26 10:22         ` Tejun Heo
@ 2013-07-26 16:55           ` Lai Jiangshan
  2013-07-26 17:03             ` Tejun Heo
  0 siblings, 1 reply; 9+ messages in thread
From: Lai Jiangshan @ 2013-07-26 16:55 UTC (permalink / raw)
  To: Tejun Heo, Lai Jiangshan; +Cc: linux-kernel

On Fri, Jul 26, 2013 at 6:22 PM, Tejun Heo <tj@kernel.org> wrote:
> On Fri, Jul 26, 2013 at 11:47:04AM +0800, Lai Jiangshan wrote:
>> any worker can't kill itself.
>> managers always tries to leave 2 workers.
>>
>> so the workers of the offline cpu pool can't be totally destroyed.
>
> But we *do* want to keep them around as CPUs taken offline are likely
> to come online at some point and destroying all of them saves only
> ~16k of memory while adding more work while CPUs are on/offlined which

4 threads, (normal and high priority wq)
~32k
it is still small.

> can be very frequent on mobile devices.  The change was *intentional*.

but sometimes the cpu is offline for long time.
and maybe the adminstrator want to reclaim the resource..

Add a boot option or sysfs switch?

>
> Thanks.
>
> --
> tejun
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] workqueue: clear workers of a pool after the CPU is offline
  2013-07-26 16:55           ` Lai Jiangshan
@ 2013-07-26 17:03             ` Tejun Heo
  0 siblings, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2013-07-26 17:03 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Lai Jiangshan, linux-kernel

On Sat, Jul 27, 2013 at 12:55:12AM +0800, Lai Jiangshan wrote:
> but sometimes the cpu is offline for long time.
> and maybe the adminstrator want to reclaim the resource..
> 
> Add a boot option or sysfs switch?

No, we don't do that to save 64k.

-- 
tejun

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] workqueue: clear workers of a pool after the CPU is offline
@ 2013-02-20 17:04 Lai Jiangshan
  0 siblings, 0 replies; 9+ messages in thread
From: Lai Jiangshan @ 2013-02-20 17:04 UTC (permalink / raw)
  To: Tejun Heo, linux-kernel; +Cc: Lai Jiangshan

After we removed the trustee_thread(), the workers of a pool can't
be totally clear after the CPU is offline.

And in future, we will introduce non-std pools, but we still
have no way to clear workers before we try to free the non-std pools.

We add offline_pool() and POOL_OFFLINE flag to do so.

1) Before we try to clear workers, we set the POOL_OFFLINE to the pool,
   and pool will not serve to works, any work which is tried to be queued
   on that pool will be rejected except chained works.
2) when all the pending works are finished and all workers are idle, worker
   thread will schedule offline_pool() to clear workers.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c |  109 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 98 insertions(+), 11 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0b1e6f2..ffdc1db 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -58,14 +58,18 @@ enum {
 	 * %WORKER_UNBOUND set and concurrency management disabled, and may
 	 * be executing on any CPU.  The pool behaves as an unbound one.
 	 *
-	 * Note that DISASSOCIATED can be flipped only while holding
-	 * assoc_mutex to avoid changing binding state while
+	 * OFFLINE is further state of DISASSOCIATED when the cpu had finished
+	 * offline and all workers will exit after they finish the last works.
+	 *
+	 * Note that DISASSOCIATED and OFFLINE can be flipped only while
+	 * holding assoc_mutex to avoid changing binding state while
 	 * create_worker() is in progress.
 	 */
 	POOL_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
 	POOL_MANAGING_WORKERS   = 1 << 1,       /* managing workers */
-	POOL_DISASSOCIATED	= 1 << 2,	/* cpu can't serve workers */
+	POOL_DISASSOCIATED	= 1 << 2,	/* pool dissociates its cpu */
 	POOL_FREEZING		= 1 << 3,	/* freeze in progress */
+	POOL_OFFLINE		= 1 << 4,	/* pool can't serve work */
 
 	/* worker flags */
 	WORKER_STARTED		= 1 << 0,	/* started */
@@ -143,6 +147,7 @@ struct worker_pool {
 						/* L: hash of busy workers */
 
 	struct mutex		assoc_mutex;	/* protect POOL_DISASSOCIATED */
+	struct work_struct	offline_work;	/* offline the pool */
 	struct ida		worker_ida;	/* L: for worker IDs */
 
 	/*
@@ -1228,6 +1233,12 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		} else {
 			spin_lock(&pwq->pool->lock);
 		}
+
+		if (unlikely(pwq->pool->flags & POOL_OFFLINE) &&
+		    WARN_ON_ONCE(!is_chained_work(wq))) {
+			spin_unlock(&pwq->pool->lock);
+			return;
+		}
 	} else {
 		pwq = get_pwq(WORK_CPU_UNBOUND, wq);
 		spin_lock(&pwq->pool->lock);
@@ -2063,6 +2074,55 @@ static bool manage_workers(struct worker *worker)
 }
 
 /**
+ * offline_pool - try to offline a pool
+ * @work: embedded offline work item of the target pool
+ *
+ * Try to offline a pool by destroying all its workers.
+ * offline_pool() only destroys workers which are idle on the idle_list.
+ * If any workers just leaves idle by any reason, it will not be destroyed,
+ * but offline_pool() will be rescheduled via worker_thread() again in
+ * this case. So offline_pool() may be called multi times to finish
+ * offline in this rare case.
+ *
+ * offline_pool() is always scheduled by system_unbound_wq included
+ * high priority pools:
+ *  1) The pool of system_unbound_wq is always online.
+ *  2) The latency of offline_pool() doesn't matter.
+ */
+static void offline_pool(struct work_struct *work)
+{
+	struct worker_pool *pool;
+	struct worker *worker;
+
+	pool = container_of(work, struct worker_pool, offline_work);
+
+	mutex_lock(&pool->assoc_mutex);
+	if (!(pool->flags & POOL_OFFLINE)) {
+		/* the pool is back, cancel offline */
+		mutex_unlock(&pool->assoc_mutex);
+		return;
+	}
+
+	spin_lock_irq(&pool->lock);
+	BUG_ON(!list_empty(&pool->worklist));
+
+	while (!list_empty(&pool->idle_list)) {
+		worker = list_first_entry(&pool->idle_list,
+					  struct worker, entry);
+		destroy_worker(worker);
+	}
+
+	spin_unlock_irq(&pool->lock);
+	mutex_unlock(&pool->assoc_mutex);
+}
+
+static inline bool need_to_offline_pool(struct worker_pool *pool)
+{
+	return (pool->flags & POOL_OFFLINE) &&
+	       (pool->nr_workers == pool->nr_idle);
+}
+
+/**
  * process_one_work - process single work
  * @worker: self
  * @work: work to process
@@ -2222,6 +2282,7 @@ static int worker_thread(void *__worker)
 {
 	struct worker *worker = __worker;
 	struct worker_pool *pool = worker->pool;
+	bool pool_offline;
 
 	/* tell the scheduler that this is a workqueue worker */
 	worker->task->flags |= PF_WQ_WORKER;
@@ -2296,8 +2357,11 @@ sleep:
 	 * event.
 	 */
 	worker_enter_idle(worker);
+	pool_offline = need_to_offline_pool(pool);
 	__set_current_state(TASK_INTERRUPTIBLE);
 	spin_unlock_irq(&pool->lock);
+	if (pool_offline)
+		queue_work(system_unbound_wq, &pool->offline_work);
 	schedule();
 	goto woke_up;
 }
@@ -3487,18 +3551,24 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_UP_PREPARE:
 		for_each_std_worker_pool(pool, cpu) {
-			struct worker *worker;
-
-			if (pool->nr_workers)
-				continue;
+			struct worker *worker = NULL;
 
-			worker = create_worker(pool);
-			if (!worker)
-				return NOTIFY_BAD;
+			mutex_lock(&pool->assoc_mutex);
+			if (!pool->nr_workers) {
+				worker = create_worker(pool);
+				if (!worker) {
+					mutex_unlock(&pool->assoc_mutex);
+					return NOTIFY_BAD;
+				}
+			}
 
 			spin_lock_irq(&pool->lock);
-			start_worker(worker);
+			pool->flags &= ~POOL_OFFLINE;
+			if (worker)
+				start_worker(worker);
+
 			spin_unlock_irq(&pool->lock);
+			mutex_unlock(&pool->assoc_mutex);
 		}
 		break;
 
@@ -3528,6 +3598,7 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
 						 void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
+	struct worker_pool *pool;
 	struct work_struct unbind_work;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
@@ -3537,6 +3608,19 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
 		queue_work_on(cpu, system_highpri_wq, &unbind_work);
 		flush_work(&unbind_work);
 		break;
+	case CPU_UP_CANCELED:
+	case CPU_POST_DEAD:
+		for_each_std_worker_pool(pool, cpu) {
+			mutex_lock(&pool->assoc_mutex);
+			spin_lock_irq(&pool->lock);
+
+			pool->flags |= POOL_OFFLINE;
+			wake_up_worker(pool);
+
+			spin_unlock_irq(&pool->lock);
+			mutex_unlock(&pool->assoc_mutex);
+		}
+		break;
 	}
 	return NOTIFY_OK;
 }
@@ -3740,6 +3824,7 @@ static int __init init_workqueues(void)
 		for_each_std_worker_pool(pool, cpu) {
 			spin_lock_init(&pool->lock);
 			pool->cpu = cpu;
+			pool->flags |= POOL_OFFLINE;
 			pool->flags |= POOL_DISASSOCIATED;
 			INIT_LIST_HEAD(&pool->worklist);
 			INIT_LIST_HEAD(&pool->idle_list);
@@ -3753,6 +3838,7 @@ static int __init init_workqueues(void)
 				    (unsigned long)pool);
 
 			mutex_init(&pool->assoc_mutex);
+			INIT_WORK(&pool->offline_work, offline_pool);
 			ida_init(&pool->worker_ida);
 
 			/* alloc pool ID */
@@ -3767,6 +3853,7 @@ static int __init init_workqueues(void)
 		for_each_std_worker_pool(pool, cpu) {
 			struct worker *worker;
 
+			pool->flags &= ~POOL_OFFLINE;
 			if (cpu != WORK_CPU_UNBOUND)
 				pool->flags &= ~POOL_DISASSOCIATED;
 
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-07-26 17:03 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-25 10:52 [PATCH] workqueue: clear workers of a pool after the CPU is offline Lai Jiangshan
2013-07-25 15:31 ` Tejun Heo
2013-07-26  2:13   ` Lai Jiangshan
2013-07-26  3:07     ` Tejun Heo
2013-07-26  3:47       ` Lai Jiangshan
2013-07-26 10:22         ` Tejun Heo
2013-07-26 16:55           ` Lai Jiangshan
2013-07-26 17:03             ` Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2013-02-20 17:04 Lai Jiangshan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.