All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] workqueue: don't skip lockdep wq dependency in cancel_work_sync()
@ 2022-07-28 12:23 Tetsuo Handa
  2022-07-28 16:40 ` Tejun Heo
  2022-07-29  2:38 ` [PATCH] workqueue: don't skip lockdep wq " Lai Jiangshan
  0 siblings, 2 replies; 18+ messages in thread
From: Tetsuo Handa @ 2022-07-28 12:23 UTC (permalink / raw)
  To: Tejun Heo, Lai Jiangshan, Johannes Berg, Hillf Danton; +Cc: LKML

Like Hillf Danton mentioned

  syzbot should have been able to catch cancel_work_sync() in work context
  by checking lockdep_map in __flush_work() for both flush and cancel.

in [1], being unable to report an obvious deadlock scenario shown below is
broken. From locking dependency perspective, sync version of cancel request
should behave as if flush request, for it waits for completion of work if
that work has already started execution.

  ----------
  #include <linux/module.h>
  #include <linux/sched.h>
  static DEFINE_MUTEX(mutex);
  static void work_fn(struct work_struct *work)
  {
    schedule_timeout_uninterruptible(HZ / 5);
    mutex_lock(&mutex);
    mutex_unlock(&mutex);
  }
  static DECLARE_WORK(work, work_fn);
  static int __init test_init(void)
  {
    schedule_work(&work);
    schedule_timeout_uninterruptible(HZ / 10);
    mutex_lock(&mutex);
    cancel_work_sync(&work);
    mutex_unlock(&mutex);
    return -EINVAL;
  }
  module_init(test_init);
  MODULE_LICENSE("GPL");
  ----------

Link: https://lkml.kernel.org/r/20220504044800.4966-1-hdanton@sina.com [1]
Reported-by: Hillf Danton <hdanton@sina.com>
Fixes: d6e89786bed977f3 ("workqueue: skip lockdep wq dependency in cancel_work_sync()")
Cc: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
 kernel/workqueue.c | 45 ++++++++++++++++++---------------------------
 1 file changed, 18 insertions(+), 27 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1ea50f6be843..e6df688f84db 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3000,8 +3000,7 @@ void drain_workqueue(struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(drain_workqueue);
 
-static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
-			     bool from_cancel)
+static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 {
 	struct worker *worker = NULL;
 	struct worker_pool *pool;
@@ -3043,8 +3042,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
 	 * workqueues the deadlock happens when the rescuer stalls, blocking
 	 * forward progress.
 	 */
-	if (!from_cancel &&
-	    (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) {
+	if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) {
 		lock_map_acquire(&pwq->wq->lockdep_map);
 		lock_map_release(&pwq->wq->lockdep_map);
 	}
@@ -3056,7 +3054,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
 	return false;
 }
 
-static bool __flush_work(struct work_struct *work, bool from_cancel)
+/**
+ * flush_work - wait for a work to finish executing the last queueing instance
+ * @work: the work to flush
+ *
+ * Wait until @work has finished execution.  @work is guaranteed to be idle
+ * on return if it hasn't been requeued since flush started.
+ *
+ * Return:
+ * %true if flush_work() waited for the work to finish execution,
+ * %false if it was already idle.
+ */
+bool flush_work(struct work_struct *work)
 {
 	struct wq_barrier barr;
 
@@ -3066,12 +3075,10 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
 	if (WARN_ON(!work->func))
 		return false;
 
-	if (!from_cancel) {
-		lock_map_acquire(&work->lockdep_map);
-		lock_map_release(&work->lockdep_map);
-	}
+	lock_map_acquire(&work->lockdep_map);
+	lock_map_release(&work->lockdep_map);
 
-	if (start_flush_work(work, &barr, from_cancel)) {
+	if (start_flush_work(work, &barr)) {
 		wait_for_completion(&barr.done);
 		destroy_work_on_stack(&barr.work);
 		return true;
@@ -3079,22 +3086,6 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
 		return false;
 	}
 }
-
-/**
- * flush_work - wait for a work to finish executing the last queueing instance
- * @work: the work to flush
- *
- * Wait until @work has finished execution.  @work is guaranteed to be idle
- * on return if it hasn't been requeued since flush started.
- *
- * Return:
- * %true if flush_work() waited for the work to finish execution,
- * %false if it was already idle.
- */
-bool flush_work(struct work_struct *work)
-{
-	return __flush_work(work, false);
-}
 EXPORT_SYMBOL_GPL(flush_work);
 
 struct cwt_wait {
@@ -3159,7 +3150,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 	 * isn't executing.
 	 */
 	if (wq_online)
-		__flush_work(work, true);
+		flush_work(work);
 
 	clear_work_data(work);
 
-- 
2.18.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] workqueue: don't skip lockdep wq dependency in cancel_work_sync()
  2022-07-28 12:23 [PATCH] workqueue: don't skip lockdep wq dependency in cancel_work_sync() Tetsuo Handa
@ 2022-07-28 16:40 ` Tejun Heo
  2022-07-28 23:18   ` Tetsuo Handa
  2022-07-29  2:38 ` [PATCH] workqueue: don't skip lockdep wq " Lai Jiangshan
  1 sibling, 1 reply; 18+ messages in thread
From: Tejun Heo @ 2022-07-28 16:40 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: Lai Jiangshan, Johannes Berg, Hillf Danton, LKML

On Thu, Jul 28, 2022 at 09:23:25PM +0900, Tetsuo Handa wrote:
> Like Hillf Danton mentioned
> 
>   syzbot should have been able to catch cancel_work_sync() in work context
>   by checking lockdep_map in __flush_work() for both flush and cancel.
> 
> in [1], being unable to report an obvious deadlock scenario shown below is
> broken. From locking dependency perspective, sync version of cancel request
> should behave as if flush request, for it waits for completion of work if
> that work has already started execution.
> 
>   ----------
>   #include <linux/module.h>
>   #include <linux/sched.h>
>   static DEFINE_MUTEX(mutex);
>   static void work_fn(struct work_struct *work)
>   {
>     schedule_timeout_uninterruptible(HZ / 5);
>     mutex_lock(&mutex);
>     mutex_unlock(&mutex);
>   }
>   static DECLARE_WORK(work, work_fn);
>   static int __init test_init(void)
>   {
>     schedule_work(&work);
>     schedule_timeout_uninterruptible(HZ / 10);
>     mutex_lock(&mutex);
>     cancel_work_sync(&work);
>     mutex_unlock(&mutex);
>     return -EINVAL;
>   }
>   module_init(test_init);
>   MODULE_LICENSE("GPL");
>   ----------
> 
> Link: https://lkml.kernel.org/r/20220504044800.4966-1-hdanton@sina.com [1]
> Reported-by: Hillf Danton <hdanton@sina.com>
> Fixes: d6e89786bed977f3 ("workqueue: skip lockdep wq dependency in cancel_work_sync()")

Tetsuo, you gotta explain why this is okay w.r.t. the spurious warnings that
the above commit addressed. You can't just state that there are cases which
are missed and then revert it.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] workqueue: don't skip lockdep wq dependency in cancel_work_sync()
  2022-07-28 16:40 ` Tejun Heo
@ 2022-07-28 23:18   ` Tetsuo Handa
  2022-07-29  1:49     ` Tetsuo Handa
  0 siblings, 1 reply; 18+ messages in thread
From: Tetsuo Handa @ 2022-07-28 23:18 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Lai Jiangshan, Johannes Berg, Hillf Danton, LKML

On 2022/07/29 1:40, Tejun Heo wrote:
> Tetsuo, you gotta explain why this is okay w.r.t. the spurious warnings that
> the above commit addressed. You can't just state that there are cases which
> are missed and then revert it.

There are four commits related to this change.

  commit 87915adc3f0acdf03c776df42e308e5a155c19af
  Author: Johannes Berg <johannes.berg@intel.com>
  Date:   Wed Aug 22 11:49:04 2018 +0200

      workqueue: re-add lockdep dependencies for flushing

  commit d6e89786bed977f37f55ffca11e563f6d2b1e3b5
  Author: Johannes Berg <johannes.berg@intel.com>
  Date:   Wed Aug 22 11:49:03 2018 +0200

      workqueue: skip lockdep wq dependency in cancel_work_sync()

  commit fd1a5b04dfb899f84ddeb8acdaea6b98283df1e5
  Author: Byungchul Park <byungchul.park@lge.com>
  Date:   Wed Oct 25 17:56:04 2017 +0900

      workqueue: Remove now redundant lock acquisitions wrt. workqueue flushes

  commit 0976dfc1d0cd80a4e9dfaf87bd8744612bde475a
  Author: Stephen Boyd <sboyd@codeaurora.org>
  Date:   Fri Apr 20 17:28:50 2012 -0700

      workqueue: Catch more locking problems with flush_work()

. Commit 87915adc3f0acdf0 ("workqueue: re-add lockdep dependencies for
flushing") saying

    These were removed after cross-release partially caught these
    problems, but now cross-release was reverted anyway. IMHO the
    removal was erroneous anyway though, since lockdep should be
    able to catch potential problems, not just actual ones, and
    cross-release would only have caught the problem when actually
    invoking wait_for_completion().

is the answer, commit 0976dfc1d0cd80a4 ("workqueue: Catch more locking
problems with flush_work()") saying

    Add a lockdep hint by acquiring and releasing the work item
    lockdep_map in flush_work() so that we always catch this
    potential deadlock scenario.

is what this patch restores.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] workqueue: don't skip lockdep wq dependency in cancel_work_sync()
  2022-07-28 23:18   ` Tetsuo Handa
@ 2022-07-29  1:49     ` Tetsuo Handa
  2022-07-29  2:28       ` Tetsuo Handa
  0 siblings, 1 reply; 18+ messages in thread
From: Tetsuo Handa @ 2022-07-29  1:49 UTC (permalink / raw)
  To: Johannes Berg; +Cc: Lai Jiangshan, Hillf Danton, LKML, Tejun Heo

Johannes, why did you think that flagging it as if cancel_work_sync()
was flush_work() is a problem?

Unconditionally recording

  "struct mutex" mutex->lockdep_map => "struct work_struct" work1->lockdep_map
  "struct mutex" mutex->lockdep_map => "struct work_struct" work2->lockdep_map

chains has zero problem.

Unconditionally recording

  "struct mutex" mutex->lockdep_map => "struct workqueue_struct" ordered_wq->lockdep_map

chain when ordered_wq can process only one work item at a time
in order to indicate that the ordered_wq is currently unable to process
other works has zero problem.

The example shown in commit d6e89786bed977f3 ("workqueue: skip lockdep wq
dependency in cancel_work_sync()") is nothing but violation of a rule that
"Do not hold a lock from a work callback function (do not record

  "struct work_struct" work1->lockdep_map => "struct mutex" mutex->lockdep_map
  "struct workqueue_struct" ordered_wq->lockdep_map => "struct mutex" mutex->lockdep_map

chain) if somebody might wait for completion of that callback function with
that lock held (might record

  "struct mutex" mutex->lockdep_map => "struct work_struct" work1->lockdep_map
  "struct mutex" mutex->lockdep_map => "struct workqueue_struct" ordered_wq->lockdep_map

chain)."

Which in-tree ordered workqueue instance is hitting this problem?


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] workqueue: don't skip lockdep wq dependency in cancel_work_sync()
  2022-07-29  1:49     ` Tetsuo Handa
@ 2022-07-29  2:28       ` Tetsuo Handa
  2022-07-29  2:49         ` Tetsuo Handa
  0 siblings, 1 reply; 18+ messages in thread
From: Tetsuo Handa @ 2022-07-29  2:28 UTC (permalink / raw)
  To: Johannes Berg; +Cc: Lai Jiangshan, Hillf Danton, LKML, Tejun Heo

Thinking more, I might be confused by difference between

  the lockdep "struct work_struct" dependency handling

and

  the lockdep "struct work" dependency handling

. In an example that

      work3_function() { /* nothing */ }
      work4_function() { mutex_lock(&mutex); ... }

      other_function() {
        queue_work(ordered_wq, &work3);
        queue_work(ordered_wq, &work4);
        mutex_lock(&mutex);
        cancel_work_sync(&work4);
      }

, possibility of deadlock must be reported by lockdep via
the lockdep "struct work" dependency handling.

Then, prior to commit d6e89786bed977f3, lockdep flagging

  the lockdep "struct work_struct" dependency handling

as if cancel_work_sync() was flush_work() is a problem, and
lockdep not flagging

  the lockdep "struct work" dependency handling

 from cancel_work_sync() as if flush_work() is also a problem.

Then, commit d6e89786bed977f3 might be OK, but commit 87915adc3f0acdf0
was wrong in that it preserved lockdep not flagging

  the lockdep "struct work" dependency handling

 from cancel_work_sync() as if flush_work().


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] workqueue: don't skip lockdep wq dependency in cancel_work_sync()
  2022-07-28 12:23 [PATCH] workqueue: don't skip lockdep wq dependency in cancel_work_sync() Tetsuo Handa
  2022-07-28 16:40 ` Tejun Heo
@ 2022-07-29  2:38 ` Lai Jiangshan
  2022-07-29  2:49   ` Lai Jiangshan
  1 sibling, 1 reply; 18+ messages in thread
From: Lai Jiangshan @ 2022-07-29  2:38 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: Tejun Heo, Johannes Berg, Hillf Danton, LKML

On Thu, Jul 28, 2022 at 8:23 PM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
>
> Like Hillf Danton mentioned
>
>   syzbot should have been able to catch cancel_work_sync() in work context
>   by checking lockdep_map in __flush_work() for both flush and cancel.
>
> in [1], being unable to report an obvious deadlock scenario shown below is
> broken. From locking dependency perspective, sync version of cancel request
> should behave as if flush request, for it waits for completion of work if
> that work has already started execution.
>
>   ----------
>   #include <linux/module.h>
>   #include <linux/sched.h>
>   static DEFINE_MUTEX(mutex);
>   static void work_fn(struct work_struct *work)
>   {
>     schedule_timeout_uninterruptible(HZ / 5);
>     mutex_lock(&mutex);
>     mutex_unlock(&mutex);
>   }
>   static DECLARE_WORK(work, work_fn);
>   static int __init test_init(void)
>   {
>     schedule_work(&work);
>     schedule_timeout_uninterruptible(HZ / 10);
>     mutex_lock(&mutex);
>     cancel_work_sync(&work);
>     mutex_unlock(&mutex);
>     return -EINVAL;
>   }
>   module_init(test_init);
>   MODULE_LICENSE("GPL");
>   ----------
>
> Link: https://lkml.kernel.org/r/20220504044800.4966-1-hdanton@sina.com [1]
> Reported-by: Hillf Danton <hdanton@sina.com>
> Fixes: d6e89786bed977f3 ("workqueue: skip lockdep wq dependency in cancel_work_sync()")
> Cc: Johannes Berg <johannes.berg@intel.com>
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> ---
>  kernel/workqueue.c | 45 ++++++++++++++++++---------------------------
>  1 file changed, 18 insertions(+), 27 deletions(-)
>
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index 1ea50f6be843..e6df688f84db 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -3000,8 +3000,7 @@ void drain_workqueue(struct workqueue_struct *wq)
>  }
>  EXPORT_SYMBOL_GPL(drain_workqueue);
>
> -static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
> -                            bool from_cancel)
> +static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
>  {
>         struct worker *worker = NULL;
>         struct worker_pool *pool;
> @@ -3043,8 +3042,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
>          * workqueues the deadlock happens when the rescuer stalls, blocking
>          * forward progress.
>          */
> -       if (!from_cancel &&
> -           (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) {
> +       if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) {
>                 lock_map_acquire(&pwq->wq->lockdep_map);
>                 lock_map_release(&pwq->wq->lockdep_map);
>         }
> @@ -3056,7 +3054,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
>         return false;
>  }
>
> -static bool __flush_work(struct work_struct *work, bool from_cancel)
> +/**
> + * flush_work - wait for a work to finish executing the last queueing instance
> + * @work: the work to flush
> + *
> + * Wait until @work has finished execution.  @work is guaranteed to be idle
> + * on return if it hasn't been requeued since flush started.
> + *
> + * Return:
> + * %true if flush_work() waited for the work to finish execution,
> + * %false if it was already idle.
> + */
> +bool flush_work(struct work_struct *work)
>  {
>         struct wq_barrier barr;
>
> @@ -3066,12 +3075,10 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
>         if (WARN_ON(!work->func))
>                 return false;
>
> -       if (!from_cancel) {
> -               lock_map_acquire(&work->lockdep_map);
> -               lock_map_release(&work->lockdep_map);
> -       }
> +       lock_map_acquire(&work->lockdep_map);
> +       lock_map_release(&work->lockdep_map);


IIUC, I think the change of these 5 lines of code (-3+2) is enough
to fix the problem described in the changelog.

If so, could you make a minimal patch?

I believe what the commit d6e89786bed977f3 ("workqueue: skip lockdep
wq dependency in cancel_work_sync()") fixes is real.  It is not a good
idea to revert it.

P.S.

The commit fd1a5b04dfb8("workqueue: Remove now redundant lock
acquisitions wrt. workqueue flushes") removed this lockdep check.

And the commit 87915adc3f0a("workqueue: re-add lockdep
dependencies for flushing") added it back for non-canceling cases.

It seems the commit fd1a5b04dfb8 is the culprit and 87915adc3f0a
didn't fixes all the problem of it.

So it is better to complete 87915adc3f0a by making __flush_work()
does lock_map_acquire(&work->lockdep_map) for both canceling and
non-canceling cases.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] workqueue: don't skip lockdep wq dependency in cancel_work_sync()
  2022-07-29  2:28       ` Tetsuo Handa
@ 2022-07-29  2:49         ` Tetsuo Handa
  2022-07-29  2:54           ` [PATCH v2] workqueue: don't skip lockdep work " Tetsuo Handa
  0 siblings, 1 reply; 18+ messages in thread
From: Tetsuo Handa @ 2022-07-29  2:49 UTC (permalink / raw)
  To: Johannes Berg; +Cc: Lai Jiangshan, Hillf Danton, LKML, Tejun Heo

On 2022/07/29 11:28, Tetsuo Handa wrote:
> Thinking more, I might be confused by difference between
> 
>   the lockdep "struct work_struct" dependency handling

the lockdep "struct workqueue_struct" dependency handling

> 
> and
> 
>   the lockdep "struct work" dependency handling

the lockdep "struct work_struct" dependency handling


On 2022/07/29 11:38, Lai Jiangshan wrote:
> The commit fd1a5b04dfb8("workqueue: Remove now redundant lock
> acquisitions wrt. workqueue flushes") removed this lockdep check.
> 
> And the commit 87915adc3f0a("workqueue: re-add lockdep
> dependencies for flushing") added it back for non-canceling cases.
> 
> It seems the commit fd1a5b04dfb8 is the culprit and 87915adc3f0a
> didn't fixes all the problem of it.
> 
> So it is better to complete 87915adc3f0a by making __flush_work()
> does lock_map_acquire(&work->lockdep_map) for both canceling and
> non-canceling cases.

Yes, commit 87915adc3f0acdf0 was incomplete.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] workqueue: don't skip lockdep wq dependency in cancel_work_sync()
  2022-07-29  2:38 ` [PATCH] workqueue: don't skip lockdep wq " Lai Jiangshan
@ 2022-07-29  2:49   ` Lai Jiangshan
  0 siblings, 0 replies; 18+ messages in thread
From: Lai Jiangshan @ 2022-07-29  2:49 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: Tejun Heo, Johannes Berg, Hillf Danton, LKML, Byungchul Park

+CC Byungchul Park <byungchul.park@lge.com>

On Fri, Jul 29, 2022 at 10:38 AM Lai Jiangshan <jiangshanlai@gmail.com> wrote:

> > +bool flush_work(struct work_struct *work)
> >  {
> >         struct wq_barrier barr;
> >
> > @@ -3066,12 +3075,10 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
> >         if (WARN_ON(!work->func))
> >                 return false;
> >
> > -       if (!from_cancel) {
> > -               lock_map_acquire(&work->lockdep_map);
> > -               lock_map_release(&work->lockdep_map);
> > -       }
> > +       lock_map_acquire(&work->lockdep_map);
> > +       lock_map_release(&work->lockdep_map);
>
>
> IIUC, I think the change of these 5 lines of code (-3+2) is enough
> to fix the problem described in the changelog.
>
> If so, could you make a minimal patch?
>
> I believe what the commit d6e89786bed977f3 ("workqueue: skip lockdep
> wq dependency in cancel_work_sync()") fixes is real.  It is not a good
> idea to revert it.
>
> P.S.
>
> The commit fd1a5b04dfb8("workqueue: Remove now redundant lock
> acquisitions wrt. workqueue flushes") removed this lockdep check.
>
> And the commit 87915adc3f0a("workqueue: re-add lockdep
> dependencies for flushing") added it back for non-canceling cases.
>
> It seems the commit fd1a5b04dfb8 is the culprit and 87915adc3f0a
> didn't fixes all the problem of it.
>
> So it is better to complete 87915adc3f0a by making __flush_work()
> does lock_map_acquire(&work->lockdep_map) for both canceling and
> non-canceling cases.

The cross-release locking check is reverted by the commit e966eaeeb623
("locking/lockdep: Remove the cross-release locking checks").

So fd1a5b04dfb8 was a kind of hasty. What it changed should be added back.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2] workqueue: don't skip lockdep work dependency in cancel_work_sync()
  2022-07-29  2:49         ` Tetsuo Handa
@ 2022-07-29  2:54           ` Tetsuo Handa
  2022-07-29  4:30             ` [PATCH v3] " Tetsuo Handa
  0 siblings, 1 reply; 18+ messages in thread
From: Tetsuo Handa @ 2022-07-29  2:54 UTC (permalink / raw)
  To: Johannes Berg, Lai Jiangshan, Tejun Heo; +Cc: Hillf Danton, LKML

Like Hillf Danton mentioned

  syzbot should have been able to catch cancel_work_sync() in work context
  by checking lockdep_map in __flush_work() for both flush and cancel.

in [1], being unable to report an obvious deadlock scenario shown below is
broken. From locking dependency perspective, sync version of cancel request
should behave as if flush request, for it waits for completion of work if
that work has already started execution.

  ----------
  #include <linux/module.h>
  #include <linux/sched.h>
  static DEFINE_MUTEX(mutex);
  static void work_fn(struct work_struct *work)
  {
    schedule_timeout_uninterruptible(HZ / 5);
    mutex_lock(&mutex);
    mutex_unlock(&mutex);
  }
  static DECLARE_WORK(work, work_fn);
  static int __init test_init(void)
  {
    schedule_work(&work);
    schedule_timeout_uninterruptible(HZ / 10);
    mutex_lock(&mutex);
    cancel_work_sync(&work);
    mutex_unlock(&mutex);
    return -EINVAL;
  }
  module_init(test_init);
  MODULE_LICENSE("GPL");
  ----------

Link: https://lkml.kernel.org/r/20220504044800.4966-1-hdanton@sina.com [1]
Reported-by: Hillf Danton <hdanton@sina.com>
Fixes: 87915adc3f0acdf0 ("workqueue: re-add lockdep dependencies for flushing")
Cc: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
Changes in v2:
  Check work's dependency and do not check workqueue's dependency.

 kernel/workqueue.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1ea50f6be843..01c5abf7fc61 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3066,10 +3066,8 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
 	if (WARN_ON(!work->func))
 		return false;
 
-	if (!from_cancel) {
-		lock_map_acquire(&work->lockdep_map);
-		lock_map_release(&work->lockdep_map);
-	}
+	lock_map_acquire(&work->lockdep_map);
+	lock_map_release(&work->lockdep_map);
 
 	if (start_flush_work(work, &barr, from_cancel)) {
 		wait_for_completion(&barr.done);
-- 
2.18.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3] workqueue: don't skip lockdep work dependency in cancel_work_sync()
  2022-07-29  2:54           ` [PATCH v2] workqueue: don't skip lockdep work " Tetsuo Handa
@ 2022-07-29  4:30             ` Tetsuo Handa
  2022-08-15 21:16               ` Tejun Heo
  2023-09-13 14:28               ` Guenter Roeck
  0 siblings, 2 replies; 18+ messages in thread
From: Tetsuo Handa @ 2022-07-29  4:30 UTC (permalink / raw)
  To: Johannes Berg, Lai Jiangshan, Tejun Heo; +Cc: Hillf Danton, LKML

Like Hillf Danton mentioned

  syzbot should have been able to catch cancel_work_sync() in work context
  by checking lockdep_map in __flush_work() for both flush and cancel.

in [1], being unable to report an obvious deadlock scenario shown below is
broken. From locking dependency perspective, sync version of cancel request
should behave as if flush request, for it waits for completion of work if
that work has already started execution.

  ----------
  #include <linux/module.h>
  #include <linux/sched.h>
  static DEFINE_MUTEX(mutex);
  static void work_fn(struct work_struct *work)
  {
    schedule_timeout_uninterruptible(HZ / 5);
    mutex_lock(&mutex);
    mutex_unlock(&mutex);
  }
  static DECLARE_WORK(work, work_fn);
  static int __init test_init(void)
  {
    schedule_work(&work);
    schedule_timeout_uninterruptible(HZ / 10);
    mutex_lock(&mutex);
    cancel_work_sync(&work);
    mutex_unlock(&mutex);
    return -EINVAL;
  }
  module_init(test_init);
  MODULE_LICENSE("GPL");
  ----------

The check this patch restores was added by commit 0976dfc1d0cd80a4
("workqueue: Catch more locking problems with flush_work()").

Then, lockdep's crossrelease feature was added by commit b09be676e0ff25bd
("locking/lockdep: Implement the 'crossrelease' feature"). As a result,
this check was once removed by commit fd1a5b04dfb899f8 ("workqueue: Remove
now redundant lock acquisitions wrt. workqueue flushes").

But lockdep's crossrelease feature was removed by commit e966eaeeb623f099
("locking/lockdep: Remove the cross-release locking checks"). At this
point, this check should have been restored.

Then, commit d6e89786bed977f3 ("workqueue: skip lockdep wq dependency in
cancel_work_sync()") introduced a boolean flag in order to distinguish
flush_work() and cancel_work_sync(), for checking "struct workqueue_struct"
dependency when called from cancel_work_sync() was causing false positives.

Then, commit 87915adc3f0acdf0 ("workqueue: re-add lockdep dependencies for
flushing") tried to restore "struct work_struct" dependency check, but by
error checked this boolean flag. Like an example shown above indicates,
"struct work_struct" dependency needs to be checked for both flush_work()
and cancel_work_sync().

Link: https://lkml.kernel.org/r/20220504044800.4966-1-hdanton@sina.com [1]
Reported-by: Hillf Danton <hdanton@sina.com>
Suggested-by: Lai Jiangshan <jiangshanlai@gmail.com>
Fixes: 87915adc3f0acdf0 ("workqueue: re-add lockdep dependencies for flushing")
Cc: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
Changes in v3:
  Update patch description.

Changes in v2:
  Check work's dependency and do not check workqueue's dependency.

 kernel/workqueue.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1ea50f6be843..01c5abf7fc61 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3066,10 +3066,8 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
 	if (WARN_ON(!work->func))
 		return false;
 
-	if (!from_cancel) {
-		lock_map_acquire(&work->lockdep_map);
-		lock_map_release(&work->lockdep_map);
-	}
+	lock_map_acquire(&work->lockdep_map);
+	lock_map_release(&work->lockdep_map);
 
 	if (start_flush_work(work, &barr, from_cancel)) {
 		wait_for_completion(&barr.done);
-- 
2.18.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] workqueue: don't skip lockdep work dependency in cancel_work_sync()
  2022-07-29  4:30             ` [PATCH v3] " Tetsuo Handa
@ 2022-08-15 21:16               ` Tejun Heo
  2023-09-13 14:28               ` Guenter Roeck
  1 sibling, 0 replies; 18+ messages in thread
From: Tejun Heo @ 2022-08-15 21:16 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: Johannes Berg, Lai Jiangshan, Hillf Danton, LKML

On Fri, Jul 29, 2022 at 01:30:23PM +0900, Tetsuo Handa wrote:
> Like Hillf Danton mentioned
> 
>   syzbot should have been able to catch cancel_work_sync() in work context
>   by checking lockdep_map in __flush_work() for both flush and cancel.
> 
> in [1], being unable to report an obvious deadlock scenario shown below is
> broken. From locking dependency perspective, sync version of cancel request
> should behave as if flush request, for it waits for completion of work if
> that work has already started execution.

Applied to cgroup/for-6.0-fixes.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] workqueue: don't skip lockdep work dependency in cancel_work_sync()
  2022-07-29  4:30             ` [PATCH v3] " Tetsuo Handa
  2022-08-15 21:16               ` Tejun Heo
@ 2023-09-13 14:28               ` Guenter Roeck
  2023-09-13 14:41                 ` Johannes Berg
  1 sibling, 1 reply; 18+ messages in thread
From: Guenter Roeck @ 2023-09-13 14:28 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: Johannes Berg, Lai Jiangshan, Tejun Heo, Hillf Danton, LKML

Hi,

On Fri, Jul 29, 2022 at 01:30:23PM +0900, Tetsuo Handa wrote:
> Like Hillf Danton mentioned
> 
>   syzbot should have been able to catch cancel_work_sync() in work context
>   by checking lockdep_map in __flush_work() for both flush and cancel.
> 
> in [1], being unable to report an obvious deadlock scenario shown below is
> broken. From locking dependency perspective, sync version of cancel request
> should behave as if flush request, for it waits for completion of work if
> that work has already started execution.
> 
>   ----------
>   #include <linux/module.h>
>   #include <linux/sched.h>
>   static DEFINE_MUTEX(mutex);
>   static void work_fn(struct work_struct *work)
>   {
>     schedule_timeout_uninterruptible(HZ / 5);
>     mutex_lock(&mutex);
>     mutex_unlock(&mutex);
>   }
>   static DECLARE_WORK(work, work_fn);
>   static int __init test_init(void)
>   {
>     schedule_work(&work);
>     schedule_timeout_uninterruptible(HZ / 10);
>     mutex_lock(&mutex);
>     cancel_work_sync(&work);
>     mutex_unlock(&mutex);
>     return -EINVAL;
>   }
>   module_init(test_init);
>   MODULE_LICENSE("GPL");
>   ----------
> 
> The check this patch restores was added by commit 0976dfc1d0cd80a4
> ("workqueue: Catch more locking problems with flush_work()").
> 
> Then, lockdep's crossrelease feature was added by commit b09be676e0ff25bd
> ("locking/lockdep: Implement the 'crossrelease' feature"). As a result,
> this check was once removed by commit fd1a5b04dfb899f8 ("workqueue: Remove
> now redundant lock acquisitions wrt. workqueue flushes").
> 
> But lockdep's crossrelease feature was removed by commit e966eaeeb623f099
> ("locking/lockdep: Remove the cross-release locking checks"). At this
> point, this check should have been restored.
> 
> Then, commit d6e89786bed977f3 ("workqueue: skip lockdep wq dependency in
> cancel_work_sync()") introduced a boolean flag in order to distinguish
> flush_work() and cancel_work_sync(), for checking "struct workqueue_struct"
> dependency when called from cancel_work_sync() was causing false positives.
> 
> Then, commit 87915adc3f0acdf0 ("workqueue: re-add lockdep dependencies for
> flushing") tried to restore "struct work_struct" dependency check, but by
> error checked this boolean flag. Like an example shown above indicates,
> "struct work_struct" dependency needs to be checked for both flush_work()
> and cancel_work_sync().
> 
> Link: https://lkml.kernel.org/r/20220504044800.4966-1-hdanton@sina.com [1]
> Reported-by: Hillf Danton <hdanton@sina.com>
> Suggested-by: Lai Jiangshan <jiangshanlai@gmail.com>
> Fixes: 87915adc3f0acdf0 ("workqueue: re-add lockdep dependencies for flushing")
> Cc: Johannes Berg <johannes.berg@intel.com>
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> ---

This patch results in the attached lockdep splat when running the
ast2600-evb emulation in qemu with aspeed_g5_defconfig and lock debugging
enabled. Reverting this patch fixes the problem.

Guenter

----
[    9.809960] ======================================================
[    9.810053] WARNING: possible circular locking dependency detected
[    9.810196] 6.6.0-rc1-00004-g6faca50f629f #1 Tainted: G                 N
[    9.810327] ------------------------------------------------------
[    9.810406] ip/357 is trying to acquire lock:
[    9.810501] 83af6c40 ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}, at: __flush_work+0x40/0x550
[    9.811052] 
[    9.811052] but task is already holding lock:
[    9.811133] 81639924 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x124/0x514
[    9.811264] 
[    9.811264] which lock already depends on the new lock.
[    9.811264] 
[    9.811361] 
[    9.811361] the existing dependency chain (in reverse order) is:
[    9.811466] 
[    9.811466] -> #1 (rtnl_mutex){+.+.}-{3:3}:
[    9.811616]        lock_acquire+0xfc/0x368
[    9.811717]        __mutex_lock+0x90/0xf00
[    9.811782]        mutex_lock_nested+0x24/0x2c
[    9.811845]        ftgmac100_reset+0x1c/0x1dc
[    9.811907]        ftgmac100_adjust_link+0xc0/0x13c
[    9.811972]        phy_link_change+0x30/0x5c
[    9.812035]        phy_check_link_status+0x9c/0x11c
[    9.812100]        phy_state_machine+0x1c0/0x2c0
[    9.812163]        process_one_work+0x23c/0x618
[    9.812225]        worker_thread+0x1e0/0x4a0
[    9.812283]        kthread+0x100/0x124
[    9.812339]        ret_from_fork+0x14/0x28
[    9.812405] 
[    9.812405] -> #0 ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}:
[    9.812531]        check_prev_add+0x128/0x15ec
[    9.812594]        __lock_acquire+0x16ec/0x20cc
[    9.812656]        lock_acquire+0xfc/0x368
[    9.812712]        __flush_work+0x70/0x550
[    9.812769]        __cancel_work_timer+0x1e4/0x264
[    9.812833]        phy_stop+0x78/0x128
[    9.812889]        ftgmac100_stop+0x5c/0xac
[    9.812949]        __dev_close_many+0xb8/0x140
[    9.813013]        __dev_change_flags+0x100/0x24c
[    9.813077]        dev_change_flags+0x20/0x5c
[    9.813137]        do_setlink+0x28c/0xf0c
[    9.813194]        rtnl_newlink+0x348/0x8b8
[    9.813253]        rtnetlink_rcv_msg+0x154/0x514
[    9.813316]        netlink_rcv_skb+0xbc/0x114
[    9.813378]        netlink_unicast+0x1b4/0x320
[    9.813438]        netlink_sendmsg+0x1fc/0x47c
[    9.813499]        ____sys_sendmsg+0xa8/0x324
[    9.813561]        ___sys_sendmsg+0x70/0x9c
[    9.813620]        sys_sendmsg+0x54/0x90
[    9.813691]        ret_fast_syscall+0x0/0x1c
[    9.813760] 
[    9.813760] other info that might help us debug this:
[    9.813760] 
[    9.813864]  Possible unsafe locking scenario:
[    9.813864] 
[    9.813938]        CPU0                    CPU1
[    9.813999]        ----                    ----
[    9.814062]   lock(rtnl_mutex);
[    9.814139]                                lock((work_completion)(&(&dev->state_queue)->work));
[    9.814258]                                lock(rtnl_mutex);
[    9.814354]   lock((work_completion)(&(&dev->state_queue)->work));
[    9.814451] 
[    9.814451]  *** DEADLOCK ***
[    9.814451] 
[    9.814543] 1 lock held by ip/357:
[    9.814608]  #0: 81639924 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x124/0x514
[    9.814761] 
[    9.814761] stack backtrace:
[    9.814900] CPU: 0 PID: 357 Comm: ip Tainted: G                 N 6.6.0-rc1-00004-g6faca50f629f #1
[    9.815019] Hardware name: Generic DT based system
[    9.815188]  unwind_backtrace from show_stack+0x18/0x1c
[    9.815299]  show_stack from dump_stack_lvl+0x68/0x90
[    9.815374]  dump_stack_lvl from check_noncircular+0x1a4/0x1e0
[    9.815452]  check_noncircular from check_prev_add+0x128/0x15ec
[    9.815529]  check_prev_add from __lock_acquire+0x16ec/0x20cc
[    9.815606]  __lock_acquire from lock_acquire+0xfc/0x368
[    9.815677]  lock_acquire from __flush_work+0x70/0x550
[    9.815746]  __flush_work from __cancel_work_timer+0x1e4/0x264
[    9.815821]  __cancel_work_timer from phy_stop+0x78/0x128
[    9.815896]  phy_stop from ftgmac100_stop+0x5c/0xac
[    9.815968]  ftgmac100_stop from __dev_close_many+0xb8/0x140
[    9.816047]  __dev_close_many from __dev_change_flags+0x100/0x24c
[    9.816127]  __dev_change_flags from dev_change_flags+0x20/0x5c
[    9.816206]  dev_change_flags from do_setlink+0x28c/0xf0c
[    9.816279]  do_setlink from rtnl_newlink+0x348/0x8b8
[    9.816351]  rtnl_newlink from rtnetlink_rcv_msg+0x154/0x514
[    9.816428]  rtnetlink_rcv_msg from netlink_rcv_skb+0xbc/0x114
[    9.816506]  netlink_rcv_skb from netlink_unicast+0x1b4/0x320
[    9.816583]  netlink_unicast from netlink_sendmsg+0x1fc/0x47c
[    9.816659]  netlink_sendmsg from ____sys_sendmsg+0xa8/0x324
[    9.816737]  ____sys_sendmsg from ___sys_sendmsg+0x70/0x9c
[    9.816812]  ___sys_sendmsg from sys_sendmsg+0x54/0x90
[    9.816884]  sys_sendmsg from ret_fast_syscall+0x0/0x1c
[    9.816983] Exception stack(0xe1f89fa8 to 0xe1f89ff0)
[    9.817464] 9fa0:                   00000000 00000000 00000003 7ed098d8 00000000 00000000
[    9.817602] 9fc0: 00000000 00000000 ffffffff 00000128 00000001 00000002 7ed09f47 7ed09f4a
[    9.817724] 9fe0: 7ed09868 7ed09858 76eb0dac 76eb0228

Decoded:

[    9.809960] ======================================================
[    9.810053] WARNING: possible circular locking dependency detected
[    9.810196] 6.6.0-rc1-00004-g6faca50f629f #1 Tainted: G                 N
[    9.810327] ------------------------------------------------------
[    9.810406] ip/357 is trying to acquire lock:
[    9.810501] 83af6c40 ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}, at: __flush_work (kernel/workqueue.c:3403) 
		-> lock_map_acquire(&work->lockdep_map);
[    9.811052] 
[    9.811052] but task is already holding lock:
[    9.811133] 81639924 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg (net/core/rtnetlink.c:6442) 
[    9.811264] 
[    9.811264] which lock already depends on the new lock.
[    9.811264] 
[    9.811361] 
[    9.811361] the existing dependency chain (in reverse order) is:
[    9.811466] 
[    9.811466] -> #1 (rtnl_mutex){+.+.}-{3:3}:
[    9.811616] lock_acquire (./arch/arm/include/asm/percpu.h:37 kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5755 kernel/locking/lockdep.c:5718) 
[    9.811717] __mutex_lock (./arch/arm/include/asm/jump_label.h:14 ./include/linux/jump_label.h:207 ./include/trace/events/lock.h:95 kernel/locking/mutex.c:605 kernel/locking/mutex.c:747) 
[    9.811782] mutex_lock_nested (kernel/locking/mutex.c:800) 
[    9.811845] ftgmac100_reset (drivers/net/ethernet/faraday/ftgmac100.c:1317) 
		-> rtnl_lock();
[    9.811907] ftgmac100_adjust_link (drivers/net/ethernet/faraday/ftgmac100.c:1419) 
[    9.811972] phy_link_change (drivers/net/phy/phy_device.c:1081) 
[    9.812035] phy_check_link_status (drivers/net/phy/phy.c:77 drivers/net/phy/phy.c:966 drivers/net/phy/phy.c:947) 
[    9.812100] phy_state_machine (drivers/net/phy/phy.c:1020 drivers/net/phy/phy.c:1497) 
[    9.812163] process_one_work (./arch/arm/include/asm/jump_label.h:14 ./include/linux/jump_label.h:207 ./include/trace/events/workqueue.h:108 kernel/workqueue.c:2635) 
		-> lock_map_acquire(&pwq->wq->lockdep_map);
[    9.812225] worker_thread (kernel/workqueue.c:2697 kernel/workqueue.c:2784) 
[    9.812283] kthread (kernel/kthread.c:388) 
[    9.812339] ret_from_fork (arch/arm/kernel/entry-common.S:135) 
[    9.812405] 
[    9.812405] -> #0 ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}:
[    9.812531] check_prev_add (kernel/locking/lockdep.c:3135) 
[    9.812594] __lock_acquire (kernel/locking/lockdep.c:3254 kernel/locking/lockdep.c:3868 kernel/locking/lockdep.c:5136) 
[    9.812656] lock_acquire (./arch/arm/include/asm/percpu.h:37 kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5755 kernel/locking/lockdep.c:5718) 
		-> lock_map_acquire(&work->lockdep_map);
[    9.812712] __flush_work (kernel/workqueue.c:3404) 
[    9.812769] __cancel_work_timer (kernel/workqueue.c:3494) 
[    9.812833] phy_stop (drivers/net/phy/phy.c:1213 drivers/net/phy/phy.c:1391) 
[    9.812889] ftgmac100_stop (drivers/net/ethernet/faraday/ftgmac100.c:348 drivers/net/ethernet/faraday/ftgmac100.c:1583) 
[    9.812949] __dev_close_many (net/core/dev.c:1536 (discriminator 2)) 
[    9.813013] __dev_change_flags (./include/linux/list.h:124 ./include/linux/list.h:215 ./include/linux/list.h:229 net/core/dev.c:1547 net/core/dev.c:8595) 
[    9.813077] dev_change_flags (net/core/dev.c:8670) 
[    9.813137] do_setlink (net/core/rtnetlink.c:2882) 
[    9.813194] rtnl_newlink (net/core/rtnetlink.c:3671 net/core/rtnetlink.c:3718) 
[    9.813253] rtnetlink_rcv_msg (net/core/rtnetlink.c:6444) 
		-> rtnl_lock();
[    9.813316] netlink_rcv_skb (net/netlink/af_netlink.c:2546) 
[    9.813378] netlink_unicast (net/netlink/af_netlink.c:1343 net/netlink/af_netlink.c:1368) 
[    9.813438] netlink_sendmsg (net/netlink/af_netlink.c:1910) 
[    9.813499] ____sys_sendmsg (net/socket.c:733 net/socket.c:753 net/socket.c:2541) 
[    9.813561] ___sys_sendmsg (net/socket.c:2595) 
[    9.813620] sys_sendmsg (./include/linux/file.h:32 net/socket.c:2626 net/socket.c:2633 net/socket.c:2631) 
[    9.813691] ret_fast_syscall (arch/arm/kernel/entry-common.S:68) 
[    9.813760] 
[    9.813760] other info that might help us debug this:
[    9.813760] 
[    9.813864]  Possible unsafe locking scenario:
[    9.813864] 
[    9.813938]        CPU0                    CPU1
[    9.813999]        ----                    ----
[    9.814062]   lock(rtnl_mutex);
[    9.814139]                                lock((work_completion)(&(&dev->state_queue)->work));
[    9.814258]                                lock(rtnl_mutex);
[    9.814354]   lock((work_completion)(&(&dev->state_queue)->work));
[    9.814451] 
[    9.814451]  *** DEADLOCK ***
[    9.814451] 
[    9.814543] 1 lock held by ip/357:
[    9.814608] #0: 81639924 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg (net/core/rtnetlink.c:6442) 
[    9.814761] 
[    9.814761] stack backtrace:
[    9.814900] CPU: 0 PID: 357 Comm: ip Tainted: G                 N 6.6.0-rc1-00004-g6faca50f629f #1
[    9.815019] Hardware name: Generic DT based system
[    9.815188] unwind_backtrace from show_stack (arch/arm/kernel/traps.c:258) 
[    9.815299] show_stack from dump_stack_lvl (lib/dump_stack.c:107 (discriminator 4)) 
[    9.815374] dump_stack_lvl from check_noncircular (kernel/locking/lockdep.c:2190) 
[    9.815452] check_noncircular from check_prev_add (kernel/locking/lockdep.c:3135) 
[    9.815529] check_prev_add from __lock_acquire (kernel/locking/lockdep.c:3254 kernel/locking/lockdep.c:3868 kernel/locking/lockdep.c:5136) 
[    9.815606] __lock_acquire from lock_acquire (./arch/arm/include/asm/percpu.h:37 kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5755 kernel/locking/lockdep.c:5718) 
[    9.815677] lock_acquire from __flush_work (kernel/workqueue.c:3404) 
[    9.815746] __flush_work from __cancel_work_timer (kernel/workqueue.c:3494) 
[    9.815821] __cancel_work_timer from phy_stop (drivers/net/phy/phy.c:1213 drivers/net/phy/phy.c:1391) 
[    9.815896] phy_stop from ftgmac100_stop (drivers/net/ethernet/faraday/ftgmac100.c:348 drivers/net/ethernet/faraday/ftgmac100.c:1583) 
[    9.815968] ftgmac100_stop from __dev_close_many (net/core/dev.c:1536 (discriminator 2)) 
[    9.816047] __dev_close_many from __dev_change_flags (./include/linux/list.h:124 ./include/linux/list.h:215 ./include/linux/list.h:229 net/core/dev.c:1547 net/core/dev.c:8595) 
[    9.816127] __dev_change_flags from dev_change_flags (net/core/dev.c:8670) 
[    9.816206] dev_change_flags from do_setlink (net/core/rtnetlink.c:2882) 
[    9.816279] do_setlink from rtnl_newlink (net/core/rtnetlink.c:3671 net/core/rtnetlink.c:3718) 
[    9.816351] rtnl_newlink from rtnetlink_rcv_msg (net/core/rtnetlink.c:6444) 
[    9.816428] rtnetlink_rcv_msg from netlink_rcv_skb (net/netlink/af_netlink.c:2546) 
[    9.816506] netlink_rcv_skb from netlink_unicast (net/netlink/af_netlink.c:1343 net/netlink/af_netlink.c:1368) 
[    9.816583] netlink_unicast from netlink_sendmsg (net/netlink/af_netlink.c:1910) 
[    9.816659] netlink_sendmsg from ____sys_sendmsg (net/socket.c:733 net/socket.c:753 net/socket.c:2541) 
[    9.816737] ____sys_sendmsg from ___sys_sendmsg (net/socket.c:2595) 
[    9.816812] ___sys_sendmsg from sys_sendmsg (./include/linux/file.h:32 net/socket.c:2626 net/socket.c:2633 net/socket.c:2631) 
[    9.816884] sys_sendmsg from ret_fast_syscall (arch/arm/kernel/entry-common.S:68) 
[    9.816983] Exception stack(0xe1f89fa8 to 0xe1f89ff0)
[    9.817464] 9fa0:                   00000000 00000000 00000003 7ed098d8 00000000 00000000
[    9.817602] 9fc0: 00000000 00000000 ffffffff 00000128 00000001 00000002 7ed09f47 7ed09f4a
[    9.817724] 9fe0: 7ed09868 7ed09858 76eb0dac 76eb0228

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] workqueue: don't skip lockdep work dependency in cancel_work_sync()
  2023-09-13 14:28               ` Guenter Roeck
@ 2023-09-13 14:41                 ` Johannes Berg
  2023-09-13 15:59                   ` Guenter Roeck
  0 siblings, 1 reply; 18+ messages in thread
From: Johannes Berg @ 2023-09-13 14:41 UTC (permalink / raw)
  To: Guenter Roeck, Tetsuo Handa
  Cc: Lai Jiangshan, Tejun Heo, Hillf Danton, LKML, Heyi Guo, netdev

Hi Guenter,

> This patch results in the attached lockdep splat when running the
> ast2600-evb emulation in qemu with aspeed_g5_defconfig and lock debugging
> enabled. Reverting this patch fixes the problem.

Umm ... That's only true if you think the problem is the lockdep splat,
rather than the actual potential deadlock?!

> [    9.809960] ======================================================
> [    9.810053] WARNING: possible circular locking dependency detected
> [    9.810196] 6.6.0-rc1-00004-g6faca50f629f #1 Tainted: G                 N

I don't have this exact tree, but on 6.6-rc1,

> [    9.810327] ------------------------------------------------------
> [    9.810406] ip/357 is trying to acquire lock:
> [    9.810501] 83af6c40 ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}, at: __flush_work+0x40/0x550
> [    9.811052] 
> [    9.811052] but task is already holding lock:
> [    9.811133] 81639924 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x124/0x514
> [    9.811264] 
> [    9.811264] which lock already depends on the new lock.
> [    9.811264] 
> [    9.811361] 
> [    9.811361] the existing dependency chain (in reverse order) is:
> [    9.811466] 
> [    9.811466] -> #1 (rtnl_mutex){+.+.}-{3:3}:
> [    9.811616]        lock_acquire+0xfc/0x368
> [    9.811717]        __mutex_lock+0x90/0xf00
> [    9.811782]        mutex_lock_nested+0x24/0x2c
> [    9.811845]        ftgmac100_reset+0x1c/0x1dc


This does indeed take the RTNL:

static void ftgmac100_reset(struct ftgmac100 *priv)
{
        struct net_device *netdev = priv->netdev;
        int err;

        netdev_dbg(netdev, "Resetting NIC...\n");

        /* Lock the world */
        rtnl_lock();

and is called from

> [    9.811907]        ftgmac100_adjust_link+0xc0/0x13c
> [    9.811972]        phy_link_change+0x30/0x5c
> [    9.812035]        phy_check_link_status+0x9c/0x11c
> [    9.812100]        phy_state_machine+0x1c0/0x2c0

this work (phy_state_machine is the function), which

> [    9.812405] -> #0 ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}:
> [    9.812531]        check_prev_add+0x128/0x15ec
> [    9.812594]        __lock_acquire+0x16ec/0x20cc
> [    9.812656]        lock_acquire+0xfc/0x368
> [    9.812712]        __flush_work+0x70/0x550
> [    9.812769]        __cancel_work_timer+0x1e4/0x264
> [    9.812833]        phy_stop+0x78/0x128

is cancelled by phy_stop() in phy_stop_machine():

void phy_stop_machine(struct phy_device *phydev)
{
        cancel_delayed_work_sync(&phydev->state_queue);

but of course that's called by the driver under RTNL:

> [    9.812889]        ftgmac100_stop+0x5c/0xac
> [    9.812949]        __dev_close_many+0xb8/0x140

(__dev_close_many requires RTNL)


So you have a potential deadlock in this driver. Yes, workqueue items
and RTNL are basically incompatible. Don't do that. Now this bug was
_probably_ added by commit 1baf2e50e48f ("drivers/net/ftgmac100: fix
DHCP potential failure with systemd") which added a call to
ftgmac100_reset() in ftgmac100_adjust_link() which is the thing called
from the PHY state machine in the first place.

Should that be reverted? I don't know ... maybe it can be fixed
differently.


But anyway ... as far as lockdep/workqueue stuff is concerned I'd
definitely call it a win rather than a bug! Yay for making lockdep
useful - it found a deadlock situation for you! :-) No need to blame
lockdep for that :P

johannes

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] workqueue: don't skip lockdep work dependency in cancel_work_sync()
  2023-09-13 14:41                 ` Johannes Berg
@ 2023-09-13 15:59                   ` Guenter Roeck
  2023-09-13 17:25                     ` Florian Fainelli
  2023-09-13 18:11                     ` Johannes Berg
  0 siblings, 2 replies; 18+ messages in thread
From: Guenter Roeck @ 2023-09-13 15:59 UTC (permalink / raw)
  To: Johannes Berg, Tetsuo Handa
  Cc: Lai Jiangshan, Tejun Heo, Hillf Danton, LKML, Heyi Guo, netdev

On 9/13/23 07:41, Johannes Berg wrote:
> Hi Guenter,
> 
>> This patch results in the attached lockdep splat when running the
>> ast2600-evb emulation in qemu with aspeed_g5_defconfig and lock debugging
>> enabled. Reverting this patch fixes the problem.
> 
> Umm ... That's only true if you think the problem is the lockdep splat,
> rather than the actual potential deadlock?!
> 

It was hard for me to say because the workqueue lock doesn't exist
in the first place if lockdep debugging is not enabled.

>> [    9.809960] ======================================================
>> [    9.810053] WARNING: possible circular locking dependency detected
>> [    9.810196] 6.6.0-rc1-00004-g6faca50f629f #1 Tainted: G                 N
> 
> I don't have this exact tree, but on 6.6-rc1,
> 

Meh, I just included a couple of bug fixes not yet available in 6.6-rc1.

>> [    9.810327] ------------------------------------------------------
>> [    9.810406] ip/357 is trying to acquire lock:
>> [    9.810501] 83af6c40 ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}, at: __flush_work+0x40/0x550
>> [    9.811052]
>> [    9.811052] but task is already holding lock:
>> [    9.811133] 81639924 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x124/0x514
>> [    9.811264]
>> [    9.811264] which lock already depends on the new lock.
>> [    9.811264]
>> [    9.811361]
>> [    9.811361] the existing dependency chain (in reverse order) is:
>> [    9.811466]
>> [    9.811466] -> #1 (rtnl_mutex){+.+.}-{3:3}:
>> [    9.811616]        lock_acquire+0xfc/0x368
>> [    9.811717]        __mutex_lock+0x90/0xf00
>> [    9.811782]        mutex_lock_nested+0x24/0x2c
>> [    9.811845]        ftgmac100_reset+0x1c/0x1dc
> 
> 
> This does indeed take the RTNL:
> 
> static void ftgmac100_reset(struct ftgmac100 *priv)
> {
>          struct net_device *netdev = priv->netdev;
>          int err;
> 
>          netdev_dbg(netdev, "Resetting NIC...\n");
> 
>          /* Lock the world */
>          rtnl_lock();
> 
> and is called from
> 
>> [    9.811907]        ftgmac100_adjust_link+0xc0/0x13c
>> [    9.811972]        phy_link_change+0x30/0x5c
>> [    9.812035]        phy_check_link_status+0x9c/0x11c
>> [    9.812100]        phy_state_machine+0x1c0/0x2c0
> 
> this work (phy_state_machine is the function), which
> 
>> [    9.812405] -> #0 ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}:
>> [    9.812531]        check_prev_add+0x128/0x15ec
>> [    9.812594]        __lock_acquire+0x16ec/0x20cc
>> [    9.812656]        lock_acquire+0xfc/0x368
>> [    9.812712]        __flush_work+0x70/0x550
>> [    9.812769]        __cancel_work_timer+0x1e4/0x264
>> [    9.812833]        phy_stop+0x78/0x128
> 
> is cancelled by phy_stop() in phy_stop_machine():
> 
> void phy_stop_machine(struct phy_device *phydev)
> {
>          cancel_delayed_work_sync(&phydev->state_queue);
> 
> but of course that's called by the driver under RTNL:
> 
>> [    9.812889]        ftgmac100_stop+0x5c/0xac
>> [    9.812949]        __dev_close_many+0xb8/0x140
> 
> (__dev_close_many requires RTNL)
> 
> 
> So you have a potential deadlock in this driver. Yes, workqueue items
> and RTNL are basically incompatible. Don't do that. Now this bug was
> _probably_ added by commit 1baf2e50e48f ("drivers/net/ftgmac100: fix
> DHCP potential failure with systemd") which added a call to
> ftgmac100_reset() in ftgmac100_adjust_link() which is the thing called
> from the PHY state machine in the first place.
> 
> Should that be reverted? I don't know ... maybe it can be fixed
> differently.
> 
> 
> But anyway ... as far as lockdep/workqueue stuff is concerned I'd
> definitely call it a win rather than a bug! Yay for making lockdep
> useful - it found a deadlock situation for you! :-) No need to blame
> lockdep for that :P
> 

So you are saying that anything running in a workqueue must not
acquire rtnl_lock because cancel_[delayed_]work_sync() may be called
under rtnl_lock.

Fair point, but is that documented somewhere ? If not, how is anyone
supposed to know ? If it is not documented, I might we well argue that
cancel_[delayed_]work_sync() should not be called with rtnl_lock held
because some worker might hold that lock.

FWIW, it would be nice if the lockdep code would generate some other
message in this situation. Complaining about a deadlock involving a
lock that doesn't exist if lock debugging isn't enabled is not really
helpful and, yes, may result in reporters to falsely assume that this
lock is responsible for the potential deadlock.

Reverting 1baf2e50e48f does fix the problem as well.

Thanks,
Guenter


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] workqueue: don't skip lockdep work dependency in cancel_work_sync()
  2023-09-13 15:59                   ` Guenter Roeck
@ 2023-09-13 17:25                     ` Florian Fainelli
  2023-09-13 17:45                       ` Guenter Roeck
  2023-09-13 18:30                       ` Johannes Berg
  2023-09-13 18:11                     ` Johannes Berg
  1 sibling, 2 replies; 18+ messages in thread
From: Florian Fainelli @ 2023-09-13 17:25 UTC (permalink / raw)
  To: Guenter Roeck, Johannes Berg, Tetsuo Handa, Heyi Guo
  Cc: Lai Jiangshan, Tejun Heo, Hillf Danton, LKML, Heyi Guo, netdev

[-- Attachment #1: Type: text/plain, Size: 5415 bytes --]

On 9/13/23 08:59, Guenter Roeck wrote:
> On 9/13/23 07:41, Johannes Berg wrote:
>> Hi Guenter,
>>
>>> This patch results in the attached lockdep splat when running the
>>> ast2600-evb emulation in qemu with aspeed_g5_defconfig and lock 
>>> debugging
>>> enabled. Reverting this patch fixes the problem.
>>
>> Umm ... That's only true if you think the problem is the lockdep splat,
>> rather than the actual potential deadlock?!
>>
> 
> It was hard for me to say because the workqueue lock doesn't exist
> in the first place if lockdep debugging is not enabled.
> 
>>> [    9.809960] ======================================================
>>> [    9.810053] WARNING: possible circular locking dependency detected
>>> [    9.810196] 6.6.0-rc1-00004-g6faca50f629f #1 Tainted: 
>>> G                 N
>>
>> I don't have this exact tree, but on 6.6-rc1,
>>
> 
> Meh, I just included a couple of bug fixes not yet available in 6.6-rc1.
> 
>>> [    9.810327] ------------------------------------------------------
>>> [    9.810406] ip/357 is trying to acquire lock:
>>> [    9.810501] 83af6c40 
>>> ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}, at: 
>>> __flush_work+0x40/0x550
>>> [    9.811052]
>>> [    9.811052] but task is already holding lock:
>>> [    9.811133] 81639924 (rtnl_mutex){+.+.}-{3:3}, at: 
>>> rtnetlink_rcv_msg+0x124/0x514
>>> [    9.811264]
>>> [    9.811264] which lock already depends on the new lock.
>>> [    9.811264]
>>> [    9.811361]
>>> [    9.811361] the existing dependency chain (in reverse order) is:
>>> [    9.811466]
>>> [    9.811466] -> #1 (rtnl_mutex){+.+.}-{3:3}:
>>> [    9.811616]        lock_acquire+0xfc/0x368
>>> [    9.811717]        __mutex_lock+0x90/0xf00
>>> [    9.811782]        mutex_lock_nested+0x24/0x2c
>>> [    9.811845]        ftgmac100_reset+0x1c/0x1dc
>>
>>
>> This does indeed take the RTNL:
>>
>> static void ftgmac100_reset(struct ftgmac100 *priv)
>> {
>>          struct net_device *netdev = priv->netdev;
>>          int err;
>>
>>          netdev_dbg(netdev, "Resetting NIC...\n");
>>
>>          /* Lock the world */
>>          rtnl_lock();
>>
>> and is called from
>>
>>> [    9.811907]        ftgmac100_adjust_link+0xc0/0x13c
>>> [    9.811972]        phy_link_change+0x30/0x5c
>>> [    9.812035]        phy_check_link_status+0x9c/0x11c
>>> [    9.812100]        phy_state_machine+0x1c0/0x2c0
>>
>> this work (phy_state_machine is the function), which
>>
>>> [    9.812405] -> #0 
>>> ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}:
>>> [    9.812531]        check_prev_add+0x128/0x15ec
>>> [    9.812594]        __lock_acquire+0x16ec/0x20cc
>>> [    9.812656]        lock_acquire+0xfc/0x368
>>> [    9.812712]        __flush_work+0x70/0x550
>>> [    9.812769]        __cancel_work_timer+0x1e4/0x264
>>> [    9.812833]        phy_stop+0x78/0x128
>>
>> is cancelled by phy_stop() in phy_stop_machine():
>>
>> void phy_stop_machine(struct phy_device *phydev)
>> {
>>          cancel_delayed_work_sync(&phydev->state_queue);
>>
>> but of course that's called by the driver under RTNL:
>>
>>> [    9.812889]        ftgmac100_stop+0x5c/0xac
>>> [    9.812949]        __dev_close_many+0xb8/0x140
>>
>> (__dev_close_many requires RTNL)
>>
>>
>> So you have a potential deadlock in this driver. Yes, workqueue items
>> and RTNL are basically incompatible. Don't do that. Now this bug was
>> _probably_ added by commit 1baf2e50e48f ("drivers/net/ftgmac100: fix
>> DHCP potential failure with systemd") which added a call to
>> ftgmac100_reset() in ftgmac100_adjust_link() which is the thing called
>> from the PHY state machine in the first place.
>>
>> Should that be reverted? I don't know ... maybe it can be fixed
>> differently.
>>
>>
>> But anyway ... as far as lockdep/workqueue stuff is concerned I'd
>> definitely call it a win rather than a bug! Yay for making lockdep
>> useful - it found a deadlock situation for you! :-) No need to blame
>> lockdep for that :P
>>
> 
> So you are saying that anything running in a workqueue must not
> acquire rtnl_lock because cancel_[delayed_]work_sync() may be called
> under rtnl_lock.
> 
> Fair point, but is that documented somewhere ? If not, how is anyone
> supposed to know ? If it is not documented, I might we well argue that
> cancel_[delayed_]work_sync() should not be called with rtnl_lock held
> because some worker might hold that lock.
> 
> FWIW, it would be nice if the lockdep code would generate some other
> message in this situation. Complaining about a deadlock involving a
> lock that doesn't exist if lock debugging isn't enabled is not really
> helpful and, yes, may result in reporters to falsely assume that this
> lock is responsible for the potential deadlock.
> 
> Reverting 1baf2e50e48f does fix the problem as well.

I would refrain from reverting without giving a fighting chance to the 
author to address it. It seems a bit strange that we do this locking 
dance while it might have been simpler to introduce a 
ftgmac100_reset_unlocked() and ftgmac100_reset() and call both at the 
intended places, something like the completely untested patch attached 
maybe?
-- 
Florian

[-- Attachment #2: 1.patch --]
[-- Type: text/x-patch, Size: 1905 bytes --]

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index a03879a27b04..253bf6d66280 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1299,24 +1299,17 @@ static int ftgmac100_init_all(struct ftgmac100 *priv, bool ignore_alloc_err)
 	return err;
 }
 
-static void ftgmac100_reset(struct ftgmac100 *priv)
+static void ftgmac100_reset_unlocked(struct ftgmac100 *priv)
 {
 	struct net_device *netdev = priv->netdev;
 	int err;
 
 	netdev_dbg(netdev, "Resetting NIC...\n");
 
-	/* Lock the world */
-	rtnl_lock();
-	if (netdev->phydev)
-		mutex_lock(&netdev->phydev->lock);
-	if (priv->mii_bus)
-		mutex_lock(&priv->mii_bus->mdio_lock);
-
 
 	/* Check if the interface is still up */
 	if (!netif_running(netdev))
-		goto bail;
+		return;
 
 	/* Stop the network stack */
 	netif_trans_update(netdev);
@@ -1338,7 +1331,19 @@ static void ftgmac100_reset(struct ftgmac100 *priv)
 	ftgmac100_init_all(priv, true);
 
 	netdev_dbg(netdev, "Reset done !\n");
- bail:
+}
+
+static void ftgmac100_reset(struct ftgmac100 *priv)
+{
+	struct net_device *netdev = priv->netdev;
+
+	rtnl_lock();
+	/* Lock the world */
+	if (netdev->phydev)
+		mutex_lock(&netdev->phydev->lock);
+	if (priv->mii_bus)
+		mutex_lock(&priv->mii_bus->mdio_lock);
+	ftgmac100_reset_unlocked(priv);
 	if (priv->mii_bus)
 		mutex_unlock(&priv->mii_bus->mdio_lock);
 	if (netdev->phydev)
@@ -1405,14 +1410,7 @@ static void ftgmac100_adjust_link(struct net_device *netdev)
 	/* Release phy lock to allow ftgmac100_reset to aquire it, keeping lock
 	 * order consistent to prevent dead lock.
 	 */
-	if (netdev->phydev)
-		mutex_unlock(&netdev->phydev->lock);
-
-	ftgmac100_reset(priv);
-
-	if (netdev->phydev)
-		mutex_lock(&netdev->phydev->lock);
-
+	ftgmac100_reset_unlocked(priv);
 }
 
 static int ftgmac100_mii_probe(struct net_device *netdev)

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] workqueue: don't skip lockdep work dependency in cancel_work_sync()
  2023-09-13 17:25                     ` Florian Fainelli
@ 2023-09-13 17:45                       ` Guenter Roeck
  2023-09-13 18:30                       ` Johannes Berg
  1 sibling, 0 replies; 18+ messages in thread
From: Guenter Roeck @ 2023-09-13 17:45 UTC (permalink / raw)
  To: Florian Fainelli, Johannes Berg, Tetsuo Handa, Heyi Guo
  Cc: Lai Jiangshan, Tejun Heo, Hillf Danton, LKML, netdev

On 9/13/23 10:25, Florian Fainelli wrote:
> On 9/13/23 08:59, Guenter Roeck wrote:
>> On 9/13/23 07:41, Johannes Berg wrote:
>>> Hi Guenter,
>>>
>>>> This patch results in the attached lockdep splat when running the
>>>> ast2600-evb emulation in qemu with aspeed_g5_defconfig and lock debugging
>>>> enabled. Reverting this patch fixes the problem.
>>>
>>> Umm ... That's only true if you think the problem is the lockdep splat,
>>> rather than the actual potential deadlock?!
>>>
>>
>> It was hard for me to say because the workqueue lock doesn't exist
>> in the first place if lockdep debugging is not enabled.
>>
>>>> [    9.809960] ======================================================
>>>> [    9.810053] WARNING: possible circular locking dependency detected
>>>> [    9.810196] 6.6.0-rc1-00004-g6faca50f629f #1 Tainted: G                 N
>>>
>>> I don't have this exact tree, but on 6.6-rc1,
>>>
>>
>> Meh, I just included a couple of bug fixes not yet available in 6.6-rc1.
>>
>>>> [    9.810327] ------------------------------------------------------
>>>> [    9.810406] ip/357 is trying to acquire lock:
>>>> [    9.810501] 83af6c40 ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}, at: __flush_work+0x40/0x550
>>>> [    9.811052]
>>>> [    9.811052] but task is already holding lock:
>>>> [    9.811133] 81639924 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x124/0x514
>>>> [    9.811264]
>>>> [    9.811264] which lock already depends on the new lock.
>>>> [    9.811264]
>>>> [    9.811361]
>>>> [    9.811361] the existing dependency chain (in reverse order) is:
>>>> [    9.811466]
>>>> [    9.811466] -> #1 (rtnl_mutex){+.+.}-{3:3}:
>>>> [    9.811616]        lock_acquire+0xfc/0x368
>>>> [    9.811717]        __mutex_lock+0x90/0xf00
>>>> [    9.811782]        mutex_lock_nested+0x24/0x2c
>>>> [    9.811845]        ftgmac100_reset+0x1c/0x1dc
>>>
>>>
>>> This does indeed take the RTNL:
>>>
>>> static void ftgmac100_reset(struct ftgmac100 *priv)
>>> {
>>>          struct net_device *netdev = priv->netdev;
>>>          int err;
>>>
>>>          netdev_dbg(netdev, "Resetting NIC...\n");
>>>
>>>          /* Lock the world */
>>>          rtnl_lock();
>>>
>>> and is called from
>>>
>>>> [    9.811907]        ftgmac100_adjust_link+0xc0/0x13c
>>>> [    9.811972]        phy_link_change+0x30/0x5c
>>>> [    9.812035]        phy_check_link_status+0x9c/0x11c
>>>> [    9.812100]        phy_state_machine+0x1c0/0x2c0
>>>
>>> this work (phy_state_machine is the function), which
>>>
>>>> [    9.812405] -> #0 ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}:
>>>> [    9.812531]        check_prev_add+0x128/0x15ec
>>>> [    9.812594]        __lock_acquire+0x16ec/0x20cc
>>>> [    9.812656]        lock_acquire+0xfc/0x368
>>>> [    9.812712]        __flush_work+0x70/0x550
>>>> [    9.812769]        __cancel_work_timer+0x1e4/0x264
>>>> [    9.812833]        phy_stop+0x78/0x128
>>>
>>> is cancelled by phy_stop() in phy_stop_machine():
>>>
>>> void phy_stop_machine(struct phy_device *phydev)
>>> {
>>>          cancel_delayed_work_sync(&phydev->state_queue);
>>>
>>> but of course that's called by the driver under RTNL:
>>>
>>>> [    9.812889]        ftgmac100_stop+0x5c/0xac
>>>> [    9.812949]        __dev_close_many+0xb8/0x140
>>>
>>> (__dev_close_many requires RTNL)
>>>
>>>
>>> So you have a potential deadlock in this driver. Yes, workqueue items
>>> and RTNL are basically incompatible. Don't do that. Now this bug was
>>> _probably_ added by commit 1baf2e50e48f ("drivers/net/ftgmac100: fix
>>> DHCP potential failure with systemd") which added a call to
>>> ftgmac100_reset() in ftgmac100_adjust_link() which is the thing called
>>> from the PHY state machine in the first place.
>>>
>>> Should that be reverted? I don't know ... maybe it can be fixed
>>> differently.
>>>
>>>
>>> But anyway ... as far as lockdep/workqueue stuff is concerned I'd
>>> definitely call it a win rather than a bug! Yay for making lockdep
>>> useful - it found a deadlock situation for you! :-) No need to blame
>>> lockdep for that :P
>>>
>>
>> So you are saying that anything running in a workqueue must not
>> acquire rtnl_lock because cancel_[delayed_]work_sync() may be called
>> under rtnl_lock.
>>
>> Fair point, but is that documented somewhere ? If not, how is anyone
>> supposed to know ? If it is not documented, I might we well argue that
>> cancel_[delayed_]work_sync() should not be called with rtnl_lock held
>> because some worker might hold that lock.
>>
>> FWIW, it would be nice if the lockdep code would generate some other
>> message in this situation. Complaining about a deadlock involving a
>> lock that doesn't exist if lock debugging isn't enabled is not really
>> helpful and, yes, may result in reporters to falsely assume that this
>> lock is responsible for the potential deadlock.
>>
>> Reverting 1baf2e50e48f does fix the problem as well.
> 
> I would refrain from reverting without giving a fighting chance to the author to address it. It seems a bit strange that we do this locking dance while it might have been simpler to introduce a ftgmac100_reset_unlocked() and ftgmac100_reset() and call both at the intended places, something like the completely untested patch attached maybe?


I agree. As it turns out, there are lots of "workqueue items" in the
kernel calling rtnl_lock(), and at least some of them are canceled
with cancel_delayed_work_sync(). So there has to be some additional
qualifying factor, such as that it is safe to acquire rtnl_lock() in a
worker as long as its cancel function is not called with the lock held.

I don't know if the attached patch would work because I don't know the
impact of executing ftgmac100_reset() without holding the locks.
Also, after all, the problem isnt't the lock itself, but the fact that
cancel_delayed_work_sync() may be called on this specific worker with
rtnl_lock held.

Guenter


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] workqueue: don't skip lockdep work dependency in cancel_work_sync()
  2023-09-13 15:59                   ` Guenter Roeck
  2023-09-13 17:25                     ` Florian Fainelli
@ 2023-09-13 18:11                     ` Johannes Berg
  1 sibling, 0 replies; 18+ messages in thread
From: Johannes Berg @ 2023-09-13 18:11 UTC (permalink / raw)
  To: Guenter Roeck, Tetsuo Handa
  Cc: Lai Jiangshan, Tejun Heo, Hillf Danton, LKML, Heyi Guo, netdev

On Wed, 2023-09-13 at 08:59 -0700, Guenter Roeck wrote:
> 
> So you are saying that anything running in a workqueue must not
> acquire rtnl_lock because cancel_[delayed_]work_sync() may be called
> under rtnl_lock.

No no, sorry if I wasn't clear. I mean this particular function / work
struct cannot acquire the RTNL because the cancel _for it_ is called
under RTNL.

It used to be that this was also tied to the entire workqueue, but this
is no longer true due to the way workqueues work these days.


> FWIW, it would be nice if the lockdep code would generate some other
> message in this situation. Complaining about a deadlock involving a
> lock that doesn't exist if lock debugging isn't enabled is not really
> helpful and, yes, may result in reporters to falsely assume that this
> lock is responsible for the potential deadlock.

Well, I don't know of any way to tell lockdep that, but I guess ideas
welcome? I mean, I'm not even sure what else it would tell you, other
than that you have a deadlock?

I mean, OK, I guess it's fair - it says "acquire lock" when it says

[    9.810406] ip/357 is trying to acquire lock:
[    9.810501] 83af6c40 ((work_completion)(&(&dev->state_queue)->work)){+.+.}-{0:0}, at: __flush_work+0x40/0x550

and it's not really a lock, but I'm not even sure how to phrase it
better? Note the scenario may be more complex than here.

I mean, perhaps we could add an optional message somehow and it could
say

"ip/357 is waiting for the work:"

but then we'd also have to update the scenario message to something like

[    9.813938]        CPU0                    CPU1
[    9.813999]        ----                    ----
[    9.814062]   lock(rtnl_mutex);
[    9.814139]                                run((work_completion)(&(&dev->state_queue)->work));
[    9.814258]                                lock(rtnl_mutex);
[    9.814354]   wait((work_completion)(&(&dev->state_queue)->work));


which is really hard to do because how should lockdep know that the two
ways of "acquiring the lock" are actually different, and which one is
which? I'm not even convinced it could really do that.

In any case, I'd rather have a bug report from this than not, even if
it's not trivial to read.


... and here I thought we went through all of this 15+ years ago when I
added it in commit 4e6045f13478 ("workqueue: debug flushing deadlocks
with lockdep"), at which time the situation was actually worse because
you had to not only pay attention to the work struct, but also the
entire workqueue - which is today only true for ordered workqueues... Oh
well :)

johannes

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] workqueue: don't skip lockdep work dependency in cancel_work_sync()
  2023-09-13 17:25                     ` Florian Fainelli
  2023-09-13 17:45                       ` Guenter Roeck
@ 2023-09-13 18:30                       ` Johannes Berg
  1 sibling, 0 replies; 18+ messages in thread
From: Johannes Berg @ 2023-09-13 18:30 UTC (permalink / raw)
  To: Florian Fainelli, Guenter Roeck, Tetsuo Handa, Heyi Guo
  Cc: Lai Jiangshan, Tejun Heo, Hillf Danton, LKML, netdev

On Wed, 2023-09-13 at 10:25 -0700, Florian Fainelli wrote:
> 
> I would refrain from reverting without giving a fighting chance to the 
> author to address it. It seems a bit strange that we do this locking 
> dance while it might have been simpler to introduce a 
> ftgmac100_reset_unlocked() and ftgmac100_reset() and call both at the 
> intended places, something like the completely untested patch attached 
> maybe?

Not sure that's right - it probably wants RTNL for some reason, but with
this patch you don't hold it when coming from ftgmac100_adjust_link()?
(If it did, it'd have deadlocked getting here after all, since it
acquires it)

Not sure why it needs RTNL though, that doesn't seem so bad, and holds
some internal locks. Or maybe it doesn't really, only if there's a
phydev and/or mii_bus, so maybe it needs a driver lock? Well, there's a
note about the reset task, that might be it?

static int ftgmac100_stop(struct net_device *netdev)
{
        struct ftgmac100 *priv = netdev_priv(netdev);

        /* Note about the reset task: We are called with the rtnl lock
         * held, so we are synchronized against the core of the reset
         * task. We must not try to synchronously cancel it otherwise
         * we can deadlock. But since it will test for netif_running()
         * which has already been cleared by the net core, we don't
         * anything special to do.
         */



But it really kind of feels the locking model in this driver is a bit
off.

johannes

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2023-09-13 18:30 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-28 12:23 [PATCH] workqueue: don't skip lockdep wq dependency in cancel_work_sync() Tetsuo Handa
2022-07-28 16:40 ` Tejun Heo
2022-07-28 23:18   ` Tetsuo Handa
2022-07-29  1:49     ` Tetsuo Handa
2022-07-29  2:28       ` Tetsuo Handa
2022-07-29  2:49         ` Tetsuo Handa
2022-07-29  2:54           ` [PATCH v2] workqueue: don't skip lockdep work " Tetsuo Handa
2022-07-29  4:30             ` [PATCH v3] " Tetsuo Handa
2022-08-15 21:16               ` Tejun Heo
2023-09-13 14:28               ` Guenter Roeck
2023-09-13 14:41                 ` Johannes Berg
2023-09-13 15:59                   ` Guenter Roeck
2023-09-13 17:25                     ` Florian Fainelli
2023-09-13 17:45                       ` Guenter Roeck
2023-09-13 18:30                       ` Johannes Berg
2023-09-13 18:11                     ` Johannes Berg
2022-07-29  2:38 ` [PATCH] workqueue: don't skip lockdep wq " Lai Jiangshan
2022-07-29  2:49   ` Lai Jiangshan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.