linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: lizhe.67@bytedance.com
To: tj@kernel.org, jiangshanlai@gmail.com
Cc: linux-kernel@vger.kernel.org, lizefan.x@bytedance.com,
	Li Zhe <lizhe.67@bytedance.com>
Subject: [PATCH] workqueue: fix memory leak of struct pool_workqueue in destroy_workqueue()
Date: Thu, 12 Aug 2021 16:38:14 +0800	[thread overview]
Message-ID: <20210812083814.32453-1-lizhe.67@bytedance.com> (raw)

From: Li Zhe <lizhe.67@bytedance.com>

Even after def98c84b6cd, we may encount sanity check failures in
destroy_workqueue() although we call flush_work() before, which
result in memory leak of struct pool_workqueue.

The warning logs are listed below.

WARNING: CPU: 0 PID: 19336 at kernel/workqueue.c:4430 destroy_workqueue+0x11a/0x2f0
*****
destroy_workqueue: test_workqueue9 has the following busy pwq
  pwq 4: cpus=2 node=0 flags=0x0 nice=0 active=0/1 refcnt=2
      in-flight: 5658:wq_barrier_func
Showing busy workqueues and worker pools:
*****

The possible stack which result in the failure is listed below.

thread A:
destroy_workqueue()
----raw_spin_lock_irq(&pwq->pool->lock)
----pwq_busy()

thread B:
----process_one_work()
----raw_spin_unlock_irq(&pool->lock)
----worker->current_func(work)
----cond_resched()
----raw_spin_lock_irq(&pool->lock)
----pwq_dec_nr_in_flight(pwq, work_color)

Thread A may get pool->lock before thread B, with the pwq->refcnt
is still 2, which result in memory leak and sanity check failures.

Notice that wq_barrier_func() only calls complete(), and it is not
suitable to expand struct work_struct considering of the memory cost,
this patch put complete() after obtaining pool->lock in function
process_one_work() to eliminate competition by identify the work as a
barrier with the work->func equal to NULL.

Signed-off-by: Li Zhe <lizhe.67@bytedance.com>
---
 kernel/workqueue.c | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f148eacda55a..02f77f35522c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -280,6 +280,12 @@ struct workqueue_struct {
 	struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
 };
 
+struct wq_barrier {
+	struct work_struct	work;
+	struct completion	done;
+	struct task_struct	*task;	/* purely informational */
+};
+
 static struct kmem_cache *pwq_cache;
 
 static cpumask_var_t *wq_numa_possible_cpumask;
@@ -2152,6 +2158,11 @@ static bool manage_workers(struct worker *worker)
 	return true;
 }
 
+static inline bool is_barrier_func(work_func_t func)
+{
+	return func == NULL;
+}
+
 /**
  * process_one_work - process single work
  * @worker: self
@@ -2273,7 +2284,8 @@ __acquires(&pool->lock)
 	 */
 	lockdep_invariant_state(true);
 	trace_workqueue_execute_start(work);
-	worker->current_func(work);
+	if (likely(!is_barrier_func(worker->current_func)))
+		worker->current_func(work);
 	/*
 	 * While we must be careful to not use "work" after this, the trace
 	 * point will only record its address.
@@ -2303,6 +2315,11 @@ __acquires(&pool->lock)
 
 	raw_spin_lock_irq(&pool->lock);
 
+	if (unlikely(is_barrier_func(worker->current_func))) {
+		struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
+		complete(&barr->done);
+	}
+
 	/* clear cpu intensive status */
 	if (unlikely(cpu_intensive))
 		worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
@@ -2618,18 +2635,6 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
 		  target_wq->name, target_func);
 }
 
-struct wq_barrier {
-	struct work_struct	work;
-	struct completion	done;
-	struct task_struct	*task;	/* purely informational */
-};
-
-static void wq_barrier_func(struct work_struct *work)
-{
-	struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
-	complete(&barr->done);
-}
-
 /**
  * insert_wq_barrier - insert a barrier work
  * @pwq: pwq to insert barrier into
@@ -2667,7 +2672,11 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
 	 * checks and call back into the fixup functions where we
 	 * might deadlock.
 	 */
-	INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
+	/* no need to init func because complete() has been moved to
+	 * proccess_one_work(), which means that we use NULL to identify
+	 * if this work is a barrier
+	 */
+	INIT_WORK_ONSTACK(&barr->work, NULL);
 	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
 
 	init_completion_map(&barr->done, &target->lockdep_map);
@@ -4682,7 +4691,7 @@ static void pr_cont_pool_info(struct worker_pool *pool)
 
 static void pr_cont_work(bool comma, struct work_struct *work)
 {
-	if (work->func == wq_barrier_func) {
+	if (is_barrier_func(work->func)) {
 		struct wq_barrier *barr;
 
 		barr = container_of(work, struct wq_barrier, work);
-- 
2.11.0


             reply	other threads:[~2021-08-12  8:38 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-12  8:38 lizhe.67 [this message]
2021-08-13 12:03 ` [PATCH] workqueue: fix memory leak of struct pool_workqueue in destroy_workqueue() Lai Jiangshan
2021-08-17  1:32   ` [PATCH 0/6] workqueue: Make flush_workqueue() also watch flush_work() Lai Jiangshan
2021-08-17  1:32     ` [PATCH 1/6] workqueue: Rename "delayed" (delayed by active management) to "inactive" Lai Jiangshan
2021-08-17  1:32     ` [PATCH 2/6] workqueue: Change arguement of pwq_dec_nr_in_flight() Lai Jiangshan
2021-08-17  1:32     ` [PATCH 3/6] workqueue: Change the code of calculating work_flags in insert_wq_barrier() Lai Jiangshan
2021-08-17  1:32     ` [PATCH 4/6] workqueue: Mark barrier work with WORK_STRUCT_INACTIVE Lai Jiangshan
2021-08-17  1:32     ` [PATCH 5/6] workqueue: Assign a color to barrier work items Lai Jiangshan
2021-08-17  1:32     ` [PATCH 6/6] workqueue: Remove unused WORK_NO_COLOR Lai Jiangshan
2021-08-17 17:48     ` [PATCH 0/6] workqueue: Make flush_workqueue() also watch flush_work() Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210812083814.32453-1-lizhe.67@bytedance.com \
    --to=lizhe.67@bytedance.com \
    --cc=jiangshanlai@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizefan.x@bytedance.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).