[PATCH] workqueue: fix memory leak of struct pool_workqueue in destroy_workqueue()

* [PATCH] workqueue: fix memory leak of struct pool_workqueue in destroy_workqueue()
@ 2021-08-12  8:38 lizhe.67
  2021-08-13 12:03 ` Lai Jiangshan
  0 siblings, 1 reply; 10+ messages in thread
From: lizhe.67 @ 2021-08-12  8:38 UTC (permalink / raw)
  To: tj, jiangshanlai; +Cc: linux-kernel, lizefan.x, Li Zhe

From: Li Zhe <lizhe.67@bytedance.com>

Even after def98c84b6cd, we may encount sanity check failures in
destroy_workqueue() although we call flush_work() before, which
result in memory leak of struct pool_workqueue.

The warning logs are listed below.

WARNING: CPU: 0 PID: 19336 at kernel/workqueue.c:4430 destroy_workqueue+0x11a/0x2f0
*****
destroy_workqueue: test_workqueue9 has the following busy pwq
  pwq 4: cpus=2 node=0 flags=0x0 nice=0 active=0/1 refcnt=2
      in-flight: 5658:wq_barrier_func
Showing busy workqueues and worker pools:
*****

The possible stack which result in the failure is listed below.

thread A:
destroy_workqueue()
----raw_spin_lock_irq(&pwq->pool->lock)
----pwq_busy()

thread B:
----process_one_work()
----raw_spin_unlock_irq(&pool->lock)
----worker->current_func(work)
----cond_resched()
----raw_spin_lock_irq(&pool->lock)
----pwq_dec_nr_in_flight(pwq, work_color)

Thread A may get pool->lock before thread B, with the pwq->refcnt
is still 2, which result in memory leak and sanity check failures.

Notice that wq_barrier_func() only calls complete(), and it is not
suitable to expand struct work_struct considering of the memory cost,
this patch put complete() after obtaining pool->lock in function
process_one_work() to eliminate competition by identify the work as a
barrier with the work->func equal to NULL.

Signed-off-by: Li Zhe <lizhe.67@bytedance.com>
---
 kernel/workqueue.c | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f148eacda55a..02f77f35522c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -280,6 +280,12 @@ struct workqueue_struct {
 	struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
 };
 
+struct wq_barrier {
+	struct work_struct	work;
+	struct completion	done;
+	struct task_struct	*task;	/* purely informational */
+};
+
 static struct kmem_cache *pwq_cache;
 
 static cpumask_var_t *wq_numa_possible_cpumask;
@@ -2152,6 +2158,11 @@ static bool manage_workers(struct worker *worker)
 	return true;
 }
 
+static inline bool is_barrier_func(work_func_t func)
+{
+	return func == NULL;
+}
+
 /**
  * process_one_work - process single work
  * @worker: self
@@ -2273,7 +2284,8 @@ __acquires(&pool->lock)
 	 */
 	lockdep_invariant_state(true);
 	trace_workqueue_execute_start(work);
-	worker->current_func(work);
+	if (likely(!is_barrier_func(worker->current_func)))
+		worker->current_func(work);
 	/*
 	 * While we must be careful to not use "work" after this, the trace
 	 * point will only record its address.
@@ -2303,6 +2315,11 @@ __acquires(&pool->lock)
 
 	raw_spin_lock_irq(&pool->lock);
 
+	if (unlikely(is_barrier_func(worker->current_func))) {
+		struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
+		complete(&barr->done);
+	}
+
 	/* clear cpu intensive status */
 	if (unlikely(cpu_intensive))
 		worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
@@ -2618,18 +2635,6 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
 		  target_wq->name, target_func);
 }
 
-struct wq_barrier {
-	struct work_struct	work;
-	struct completion	done;
-	struct task_struct	*task;	/* purely informational */
-};
-
-static void wq_barrier_func(struct work_struct *work)
-{
-	struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
-	complete(&barr->done);
-}
-
 /**
  * insert_wq_barrier - insert a barrier work
  * @pwq: pwq to insert barrier into
@@ -2667,7 +2672,11 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
 	 * checks and call back into the fixup functions where we
 	 * might deadlock.
 	 */
-	INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
+	/* no need to init func because complete() has been moved to
+	 * proccess_one_work(), which means that we use NULL to identify
+	 * if this work is a barrier
+	 */
+	INIT_WORK_ONSTACK(&barr->work, NULL);
 	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
 
 	init_completion_map(&barr->done, &target->lockdep_map);
@@ -4682,7 +4691,7 @@ static void pr_cont_pool_info(struct worker_pool *pool)
 
 static void pr_cont_work(bool comma, struct work_struct *work)
 {
-	if (work->func == wq_barrier_func) {
+	if (is_barrier_func(work->func)) {
 		struct wq_barrier *barr;
 
 		barr = container_of(work, struct wq_barrier, work);
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread