All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
@ 2018-09-27 18:00 Sagi Grimberg
  2018-09-28 22:14 ` Bart Van Assche
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Sagi Grimberg @ 2018-09-27 18:00 UTC (permalink / raw)


Queue deletion is done asynchronous when the last reference on
the queue is dropped. Thus, in order to make sure we don't over
allocate under a connect/disconnect storm, we let queue deletion
complete before making forward progress.

However, given that we flush the system_wq from rdma_cm context
which runs from a workqueue context, we can have a circular
locking complaint [1]. Fix that by using a private workqueue for
queue deletion.

[1]:
======================================================
WARNING: possible circular locking dependency detected
4.19.0-rc4-dbg+ #3 Not tainted
------------------------------------------------------
kworker/5:0/39 is trying to acquire lock:
00000000a10b6db9 (&id_priv->handler_mutex){+.+.}, at: rdma_destroy_id+0x6f/0x440 [rdma_cm]

but task is already holding lock:
00000000331b4e2c ((work_completion)(&queue->release_work)){+.+.}, at: process_one_work+0x3ed/0xa20

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #3 ((work_completion)(&queue->release_work)){+.+.}:
       process_one_work+0x474/0xa20
       worker_thread+0x63/0x5a0
       kthread+0x1cf/0x1f0
       ret_from_fork+0x24/0x30

-> #2 ((wq_completion)"events"){+.+.}:
       flush_workqueue+0xf3/0x970
       nvmet_rdma_cm_handler+0x133d/0x1734 [nvmet_rdma]
       cma_ib_req_handler+0x72f/0xf90 [rdma_cm]
       cm_process_work+0x2e/0x110 [ib_cm]
       cm_req_handler+0x135b/0x1c30 [ib_cm]
       cm_work_handler+0x2b7/0x38cd [ib_cm]
       process_one_work+0x4ae/0xa20
nvmet_rdma:nvmet_rdma_cm_handler: nvmet_rdma: disconnected (10): status 0 id 0000000040357082
       worker_thread+0x63/0x5a0
       kthread+0x1cf/0x1f0
       ret_from_fork+0x24/0x30
nvme nvme0: Reconnecting in 10 seconds...

-> #1 (&id_priv->handler_mutex/1){+.+.}:
       __mutex_lock+0xfe/0xbe0
       mutex_lock_nested+0x1b/0x20
       cma_ib_req_handler+0x6aa/0xf90 [rdma_cm]
       cm_process_work+0x2e/0x110 [ib_cm]
       cm_req_handler+0x135b/0x1c30 [ib_cm]
       cm_work_handler+0x2b7/0x38cd [ib_cm]
       process_one_work+0x4ae/0xa20
       worker_thread+0x63/0x5a0
       kthread+0x1cf/0x1f0
       ret_from_fork+0x24/0x30

-> #0 (&id_priv->handler_mutex){+.+.}:
       lock_acquire+0xc5/0x200
       __mutex_lock+0xfe/0xbe0
       mutex_lock_nested+0x1b/0x20
       rdma_destroy_id+0x6f/0x440 [rdma_cm]
       nvmet_rdma_release_queue_work+0x8e/0x1b0 [nvmet_rdma]
       process_one_work+0x4ae/0xa20
       worker_thread+0x63/0x5a0
       kthread+0x1cf/0x1f0
       ret_from_fork+0x24/0x30

Fixes: 777dc82395de ("nvmet-rdma: occasionally flush ongoing controller teardown")
Reported-by: Bart Van Assche <bvanassche at acm.org>
Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
---
 drivers/nvme/target/rdma.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 3533e918ea37..eb520b3519f0 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -121,6 +121,7 @@ struct nvmet_rdma_device {
 	int			inline_page_count;
 };
 
+struct workqueue_struct *nvmet_rdma_delete_wq;
 static bool nvmet_rdma_use_srq;
 module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
 MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
@@ -1244,12 +1245,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 
 	if (queue->host_qid == 0) {
 		/* Let inflight controller teardown complete */
-		flush_scheduled_work();
+		flush_workqueue(nvmet_rdma_delete_wq);
 	}
 
 	ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
 	if (ret) {
-		schedule_work(&queue->release_work);
+		queue_work(nvmet_rdma_delete_wq, &queue->release_work);
 		/* Destroying rdma_cm id is not needed here */
 		return 0;
 	}
@@ -1314,7 +1315,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
 
 	if (disconnect) {
 		rdma_disconnect(queue->cm_id);
-		schedule_work(&queue->release_work);
+		queue_work(nvmet_rdma_delete_wq, &queue->release_work);
 	}
 }
 
@@ -1344,7 +1345,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
 	mutex_unlock(&nvmet_rdma_queue_mutex);
 
 	pr_err("failed to connect queue %d\n", queue->idx);
-	schedule_work(&queue->release_work);
+	queue_work(nvmet_rdma_delete_wq, &queue->release_work);
 }
 
 /**
@@ -1626,8 +1627,17 @@ static int __init nvmet_rdma_init(void)
 	if (ret)
 		goto err_ib_client;
 
+	nvmet_rdma_delete_wq = alloc_workqueue("nvmet-rdma-delete-wq",
+			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+	if (!nvmet_rdma_delete_wq) {
+		ret = -ENOMEM;
+		goto err_unreg_transport;
+	}
+
 	return 0;
 
+err_unreg_transport:
+	nvmet_unregister_transport(&nvmet_rdma_ops);
 err_ib_client:
 	ib_unregister_client(&nvmet_rdma_ib_client);
 	return ret;
@@ -1635,6 +1645,7 @@ static int __init nvmet_rdma_init(void)
 
 static void __exit nvmet_rdma_exit(void)
 {
+	destroy_workqueue(nvmet_rdma_delete_wq);
 	nvmet_unregister_transport(&nvmet_rdma_ops);
 	ib_unregister_client(&nvmet_rdma_ib_client);
 	WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list));
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-09-27 18:00 [PATCH RFC] nvmet-rdma: use a private workqueue for delete Sagi Grimberg
@ 2018-09-28 22:14 ` Bart Van Assche
  2018-10-01 20:12   ` Sagi Grimberg
  2018-10-05  7:25 ` Christoph Hellwig
       [not found] ` <CAO+b5-oBVw=-wvnWk1EF=RBaZtjX6bjUG+3WABXbvzX9UTu26w@mail.gmail.com>
  2 siblings, 1 reply; 15+ messages in thread
From: Bart Van Assche @ 2018-09-28 22:14 UTC (permalink / raw)


On Thu, 2018-09-27@11:00 -0700, Sagi Grimberg wrote:
> Queue deletion is done asynchronous when the last reference on
> the queue is dropped. Thus, in order to make sure we don't over
> allocate under a connect/disconnect storm, we let queue deletion
> complete before making forward progress.
> 
> However, given that we flush the system_wq from rdma_cm context
> which runs from a workqueue context, we can have a circular
> locking complaint [1]. Fix that by using a private workqueue for
> queue deletion.

Hi Sagi,

Thanks for this patch. With this patch applied the warning I reported
earlier disappears but a new warning appeared:

======================================================
WARNING: possible circular locking dependency detected
4.19.0-rc5-dbg+ #8 Not tainted
------------------------------------------------------
kworker/u24:7/977 is trying to acquire lock:
000000003dc8471f (&id_priv->handler_mutex){+.+.}, at: rdma_destroy_id+0x6f/0x440 [rdma_cm]

but task is already holding lock:
0000000086ca5cb1 ((work_completion)(&queue->release_work)){+.+.}, at: process_one_work+0x3ed/0xa20

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #3 ((work_completion)(&queue->release_work)){+.+.}:
       process_one_work+0x474/0xa20
       worker_thread+0x63/0x5a0
       kthread+0x1cf/0x1f0
       ret_from_fork+0x24/0x30

-> #2 ((wq_completion)"nvmet-rdma-delete-wq"){+.+.}:
       flush_workqueue+0xf3/0x970
       nvmet_rdma_cm_handler+0x1319/0x170f [nvmet_rdma]
       cma_ib_req_handler+0x72f/0xf90 [rdma_cm]
       cm_process_work+0x2e/0x110 [ib_cm]
       cm_req_handler+0x135b/0x1c30 [ib_cm]
       cm_work_handler+0x2b7/0x38cd [ib_cm]
       process_one_work+0x4ae/0xa20
       worker_thread+0x63/0x5a0
       kthread+0x1cf/0x1f0
       ret_from_fork+0x24/0x30

-> #1 (&id_priv->handler_mutex/1){+.+.}:
       __mutex_lock+0xfe/0xbe0
       mutex_lock_nested+0x1b/0x20
       cma_ib_req_handler+0x6aa/0xf90 [rdma_cm]
       cm_process_work+0x2e/0x110 [ib_cm]
       cm_req_handler+0x135b/0x1c30 [ib_cm]
       cm_work_handler+0x2b7/0x38cd [ib_cm]
       process_one_work+0x4ae/0xa20
       worker_thread+0x63/0x5a0
       kthread+0x1cf/0x1f0
       ret_from_fork+0x24/0x30

-> #0 (&id_priv->handler_mutex){+.+.}:
       lock_acquire+0xc5/0x200
       __mutex_lock+0xfe/0xbe0
       mutex_lock_nested+0x1b/0x20
       rdma_destroy_id+0x6f/0x440 [rdma_cm]
       nvmet_rdma_release_queue_work+0x8e/0x1b0 [nvmet_rdma]
       process_one_work+0x4ae/0xa20
       worker_thread+0x63/0x5a0
       kthread+0x1cf/0x1f0
       ret_from_fork+0x24/0x30

other info that might help us debug this:

Chain exists of:
  &id_priv->handler_mutex --> (wq_completion)"nvmet-rdma-delete-wq" --> (work_completion)(&queue->release_work)

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock((work_completion)(&queue->release_work));
                               lock((wq_completion)"nvmet-rdma-delete-wq");
                               lock((work_completion)(&queue->release_work));
  lock(&id_priv->handler_mutex);

 *** DEADLOCK ***

2 locks held by kworker/u24:7/977:
 #0: 00000000148450ab ((wq_completion)"nvmet-rdma-delete-wq"){+.+.}, at: process_one_work+0x3ed/0xa20
 #1: 0000000086ca5cb1 ((work_completion)(&queue->release_work)){+.+.}, at: process_one_work+0x3ed/0xa20

stack backtrace:
CPU: 1 PID: 977 Comm: kworker/u24:7 Not tainted 4.19.0-rc5-dbg+ #8
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
Workqueue: nvmet-rdma-delete-wq nvmet_rdma_release_queue_work [nvmet_rdma]
Call Trace:
 dump_stack+0x86/0xc5
 print_circular_bug.isra.32+0x20a/0x218
 __lock_acquire+0x1a54/0x1b20
 lock_acquire+0xc5/0x200
 __mutex_lock+0xfe/0xbe0
 mutex_lock_nested+0x1b/0x20
 rdma_destroy_id+0x6f/0x440 [rdma_cm]
 nvmet_rdma_release_queue_work+0x8e/0x1b0 [nvmet_rdma]
 process_one_work+0x4ae/0xa20
 worker_thread+0x63/0x5a0
 kthread+0x1cf/0x1f0
 ret_from_fork+0x24/0x30

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-09-28 22:14 ` Bart Van Assche
@ 2018-10-01 20:12   ` Sagi Grimberg
  2018-10-02 15:02     ` Bart Van Assche
  0 siblings, 1 reply; 15+ messages in thread
From: Sagi Grimberg @ 2018-10-01 20:12 UTC (permalink / raw)



>> Queue deletion is done asynchronous when the last reference on
>> the queue is dropped. Thus, in order to make sure we don't over
>> allocate under a connect/disconnect storm, we let queue deletion
>> complete before making forward progress.
>>
>> However, given that we flush the system_wq from rdma_cm context
>> which runs from a workqueue context, we can have a circular
>> locking complaint [1]. Fix that by using a private workqueue for
>> queue deletion.
> 
> Hi Sagi,
> 
> Thanks for this patch. With this patch applied the warning I reported
> earlier disappears but a new warning appeared:

Thanks for testing, this is a similar complaint though...

What I'm missing here is why flushing a work that runs on workqueue A
can't be done from a work that runs on workqueue B. It is
guaranteed that the id_priv that is used as a barrier from
rdma_destroy_id is different from the id_priv that is handling the
connect. So I'm not clear on what is the dependency yet.

Any insights?

> 
> ======================================================
> WARNING: possible circular locking dependency detected
> 4.19.0-rc5-dbg+ #8 Not tainted
> ------------------------------------------------------
> kworker/u24:7/977 is trying to acquire lock:
> 000000003dc8471f (&id_priv->handler_mutex){+.+.}, at: rdma_destroy_id+0x6f/0x440 [rdma_cm]
> 
> but task is already holding lock:
> 0000000086ca5cb1 ((work_completion)(&queue->release_work)){+.+.}, at: process_one_work+0x3ed/0xa20
> 
> which lock already depends on the new lock.
> 
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #3 ((work_completion)(&queue->release_work)){+.+.}:
>         process_one_work+0x474/0xa20
>         worker_thread+0x63/0x5a0
>         kthread+0x1cf/0x1f0
>         ret_from_fork+0x24/0x30
> 
> -> #2 ((wq_completion)"nvmet-rdma-delete-wq"){+.+.}:
>         flush_workqueue+0xf3/0x970
>         nvmet_rdma_cm_handler+0x1319/0x170f [nvmet_rdma]
>         cma_ib_req_handler+0x72f/0xf90 [rdma_cm]
>         cm_process_work+0x2e/0x110 [ib_cm]
>         cm_req_handler+0x135b/0x1c30 [ib_cm]
>         cm_work_handler+0x2b7/0x38cd [ib_cm]
>         process_one_work+0x4ae/0xa20
>         worker_thread+0x63/0x5a0
>         kthread+0x1cf/0x1f0
>         ret_from_fork+0x24/0x30
> 
> -> #1 (&id_priv->handler_mutex/1){+.+.}:
>         __mutex_lock+0xfe/0xbe0
>         mutex_lock_nested+0x1b/0x20
>         cma_ib_req_handler+0x6aa/0xf90 [rdma_cm]
>         cm_process_work+0x2e/0x110 [ib_cm]
>         cm_req_handler+0x135b/0x1c30 [ib_cm]
>         cm_work_handler+0x2b7/0x38cd [ib_cm]
>         process_one_work+0x4ae/0xa20
>         worker_thread+0x63/0x5a0
>         kthread+0x1cf/0x1f0
>         ret_from_fork+0x24/0x30
> 
> -> #0 (&id_priv->handler_mutex){+.+.}:
>         lock_acquire+0xc5/0x200
>         __mutex_lock+0xfe/0xbe0
>         mutex_lock_nested+0x1b/0x20
>         rdma_destroy_id+0x6f/0x440 [rdma_cm]
>         nvmet_rdma_release_queue_work+0x8e/0x1b0 [nvmet_rdma]
>         process_one_work+0x4ae/0xa20
>         worker_thread+0x63/0x5a0
>         kthread+0x1cf/0x1f0
>         ret_from_fork+0x24/0x30
> 
> other info that might help us debug this:
> 
> Chain exists of:
>    &id_priv->handler_mutex --> (wq_completion)"nvmet-rdma-delete-wq" --> (work_completion)(&queue->release_work)
> 
>   Possible unsafe locking scenario:
> 
>         CPU0                    CPU1
>         ----                    ----
>    lock((work_completion)(&queue->release_work));
>                                 lock((wq_completion)"nvmet-rdma-delete-wq");
>                                 lock((work_completion)(&queue->release_work));
>    lock(&id_priv->handler_mutex);
> 
>   *** DEADLOCK ***
> 
> 2 locks held by kworker/u24:7/977:
>   #0: 00000000148450ab ((wq_completion)"nvmet-rdma-delete-wq"){+.+.}, at: process_one_work+0x3ed/0xa20
>   #1: 0000000086ca5cb1 ((work_completion)(&queue->release_work)){+.+.}, at: process_one_work+0x3ed/0xa20
> 
> stack backtrace:
> CPU: 1 PID: 977 Comm: kworker/u24:7 Not tainted 4.19.0-rc5-dbg+ #8
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
> Workqueue: nvmet-rdma-delete-wq nvmet_rdma_release_queue_work [nvmet_rdma]
> Call Trace:
>   dump_stack+0x86/0xc5
>   print_circular_bug.isra.32+0x20a/0x218
>   __lock_acquire+0x1a54/0x1b20
>   lock_acquire+0xc5/0x200
>   __mutex_lock+0xfe/0xbe0
>   mutex_lock_nested+0x1b/0x20
>   rdma_destroy_id+0x6f/0x440 [rdma_cm]
>   nvmet_rdma_release_queue_work+0x8e/0x1b0 [nvmet_rdma]
>   process_one_work+0x4ae/0xa20
>   worker_thread+0x63/0x5a0
>   kthread+0x1cf/0x1f0
>   ret_from_fork+0x24/0x30
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-10-01 20:12   ` Sagi Grimberg
@ 2018-10-02 15:02     ` Bart Van Assche
  0 siblings, 0 replies; 15+ messages in thread
From: Bart Van Assche @ 2018-10-02 15:02 UTC (permalink / raw)


On Mon, 2018-10-01@13:12 -0700, Sagi Grimberg wrote:
> > > Queue deletion is done asynchronous when the last reference on
> > > the queue is dropped. Thus, in order to make sure we don't over
> > > allocate under a connect/disconnect storm, we let queue deletion
> > > complete before making forward progress.
> > > 
> > > However, given that we flush the system_wq from rdma_cm context
> > > which runs from a workqueue context, we can have a circular
> > > locking complaint [1]. Fix that by using a private workqueue for
> > > queue deletion.
> > 
> > Hi Sagi,
> > 
> > Thanks for this patch. With this patch applied the warning I reported
> > earlier disappears but a new warning appeared:
> 
> Thanks for testing, this is a similar complaint though...
> 
> What I'm missing here is why flushing a work that runs on workqueue A
> can't be done from a work that runs on workqueue B. It is
> guaranteed that the id_priv that is used as a barrier from
> rdma_destroy_id is different from the id_priv that is handling the
> connect. So I'm not clear on what is the dependency yet.
> 
> Any insights?

Hi Sagi,

Further testing showed that the warning shown in my previous e-mail also
occurs without your patch. Since I'm fine with your patch, feel free to add:

Tested-by: Bart Van Assche <bvanassche at acm.org>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-09-27 18:00 [PATCH RFC] nvmet-rdma: use a private workqueue for delete Sagi Grimberg
  2018-09-28 22:14 ` Bart Van Assche
@ 2018-10-05  7:25 ` Christoph Hellwig
       [not found] ` <CAO+b5-oBVw=-wvnWk1EF=RBaZtjX6bjUG+3WABXbvzX9UTu26w@mail.gmail.com>
  2 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2018-10-05  7:25 UTC (permalink / raw)


Thanks,

applied to nvme-4.20.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
       [not found] ` <CAO+b5-oBVw=-wvnWk1EF=RBaZtjX6bjUG+3WABXbvzX9UTu26w@mail.gmail.com>
@ 2018-10-19  1:08   ` Sagi Grimberg
  2018-10-19 16:23     ` Bart Van Assche
  0 siblings, 1 reply; 15+ messages in thread
From: Sagi Grimberg @ 2018-10-19  1:08 UTC (permalink / raw)



> It seems like this has not yet been fixed entirely. This is what appeared
> in the kernel log this morning on my test setup with Christoph's nvme-4.20
> branch (commit cb4bfda62afa ("nvme-pci: fix hot removal during error
> handling")):

There is something I'm missing here, the id_priv->handler_mutex that the
connect context is running on is guaranteed to be different than the
one being removed (a different cm_id) and also the workqueues are
different.

Is it not allowed to flush workqueue A from a work that is hosted on
workqueue B?

> 
> ======================================================
> WARNING: possible circular locking dependency detected
> 4.19.0-rc6-dbg+ #1 Not tainted
> ------------------------------------------------------
> kworker/u16:7/169 is trying to acquire lock:
> 00000000578ccf82 (&id_priv->handler_mutex){+.+.}, at: 
> rdma_destroy_id+0x6f/0x440 [rdma_cm]
> 
> but task is already holding lock:
> 000000005d67271b ((work_completion)(&queue->release_work)){+.+.}, at: 
> process_one_work+0x3ed/0xa20
> 
> which lock already depends on the new lock.
> 
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #3 ((work_completion)(&queue->release_work)){+.+.}:
>  ? ? ? ?process_one_work+0x474/0xa20
>  ? ? ? ?worker_thread+0x63/0x5a0
>  ? ? ? ?kthread+0x1cf/0x1f0
>  ? ? ? ?ret_from_fork+0x24/0x30
> 
> -> #2 ((wq_completion)"nvmet-rdma-delete-wq"){+.+.}:
>  ? ? ? ?flush_workqueue+0xf3/0x970
>  ? ? ? ?nvmet_rdma_cm_handler+0x1319/0x170f [nvmet_rdma]
>  ? ? ? ?cma_ib_req_handler+0x72f/0xf90 [rdma_cm]
>  ? ? ? ?cm_process_work+0x2e/0x110 [ib_cm]
>  ? ? ? ?cm_req_handler+0x135b/0x1c30 [ib_cm]
>  ? ? ? ?cm_work_handler+0x2b7/0x38cd [ib_cm]
>  ? ? ? ?process_one_work+0x4ae/0xa20
>  ? ? ? ?worker_thread+0x63/0x5a0
>  ? ? ? ?kthread+0x1cf/0x1f0
>  ? ? ? ?ret_from_fork+0x24/0x30
> 
> -> #1 (&id_priv->handler_mutex/1){+.+.}:
>  ? ? ? ?__mutex_lock+0xfe/0xbe0
>  ? ? ? ?mutex_lock_nested+0x1b/0x20
>  ? ? ? ?cma_ib_req_handler+0x6aa/0xf90 [rdma_cm]
>  ? ? ? ?cm_process_work+0x2e/0x110 [ib_cm]
>  ? ? ? ?cm_req_handler+0x135b/0x1c30 [ib_cm]
>  ? ? ? ?cm_work_handler+0x2b7/0x38cd [ib_cm]
>  ? ? ? ?process_one_work+0x4ae/0xa20
>  ? ? ? ?worker_thread+0x63/0x5a0
>  ? ? ? ?kthread+0x1cf/0x1f0
>  ? ? ? ?ret_from_fork+0x24/0x30
> 
> -> #0 (&id_priv->handler_mutex){+.+.}:
>  ? ? ? ?lock_acquire+0xd2/0x210
>  ? ? ? ?__mutex_lock+0xfe/0xbe0
>  ? ? ? ?mutex_lock_nested+0x1b/0x20
>  ? ? ? ?rdma_destroy_id+0x6f/0x440 [rdma_cm]
>  ? ? ? ?nvmet_rdma_release_queue_work+0x8e/0x1b0 [nvmet_rdma]
>  ? ? ? ?process_one_work+0x4ae/0xa20
>  ? ? ? ?worker_thread+0x63/0x5a0
>  ? ? ? ?kthread+0x1cf/0x1f0
>  ? ? ? ?ret_from_fork+0x24/0x30
> 
> other info that might help us debug this:
> 
> Chain exists of:
>  ? &id_priv->handler_mutex --> (wq_completion)"nvmet-rdma-delete-wq" --> 
> (work_completion)(&queue->release_work)
> 
>  ?Possible unsafe locking scenario:
> 
>  ? ? ? ?CPU0 ? ? ? ? ? ? ? ? ? ?CPU1
>  ? ? ? ?---- ? ? ? ? ? ? ? ? ? ?----
>  ? lock((work_completion)(&queue->release_work));
>  ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?lock((wq_completion)"nvmet-rdma-delete-wq");
>                                
>  ?lock((work_completion)(&queue->release_work));
>  ? lock(&id_priv->handler_mutex);
> 
>  ?*** DEADLOCK ***
> 
> 2 locks held by kworker/u16:7/169:
>  ?#0: 00000000a32d4be9 ((wq_completion)"nvmet-rdma-delete-wq"){+.+.}, 
> at: process_one_work+0x3ed/0xa20
>  ?#1: 000000005d67271b ((work_completion)(&queue->release_work)){+.+.}, 
> at: process_one_work+0x3ed/0xa20
> 
> stack backtrace:
> CPU: 1 PID: 169 Comm: kworker/u16:7 Not tainted 4.19.0-rc6-dbg+ #1
> Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
> Workqueue: nvmet-rdma-delete-wq nvmet_rdma_release_queue_work [nvmet_rdma]
> Call Trace:
>  ?dump_stack+0xa4/0xf5
>  ?print_circular_bug.isra.32+0x20a/0x218
>  ?__lock_acquire+0x1a5e/0x1b20
>  ?lock_acquire+0xd2/0x210
>  ?__mutex_lock+0xfe/0xbe0
>  ?mutex_lock_nested+0x1b/0x20
>  ?rdma_destroy_id+0x6f/0x440 [rdma_cm]
>  ?nvmet_rdma_release_queue_work+0x8e/0x1b0 [nvmet_rdma]
>  ?process_one_work+0x4ae/0xa20
>  ?worker_thread+0x63/0x5a0
>  ?kthread+0x1cf/0x1f0
>  ?ret_from_fork+0x24/0x30

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-10-19  1:08   ` Sagi Grimberg
@ 2018-10-19 16:23     ` Bart Van Assche
  2018-10-22  8:56       ` Johannes Berg
  0 siblings, 1 reply; 15+ messages in thread
From: Bart Van Assche @ 2018-10-19 16:23 UTC (permalink / raw)


On Thu, 2018-10-18@18:08 -0700, Sagi Grimberg wrote:
+AD4 +AD4 It seems like this has not yet been fixed entirely. This is what appeared
+AD4 +AD4 in the kernel log this morning on my test setup with Christoph's nvme-4.20
+AD4 +AD4 branch (commit cb4bfda62afa (+ACI-nvme-pci: fix hot removal during error
+AD4 +AD4 handling+ACI)):
+AD4 
+AD4 There is something I'm missing here, the id+AF8-priv-+AD4-handler+AF8-mutex that the
+AD4 connect context is running on is guaranteed to be different than the
+AD4 one being removed (a different cm+AF8-id) and also the workqueues are
+AD4 different.
+AD4 
+AD4 Is it not allowed to flush workqueue A from a work that is hosted on
+AD4 workqueue B?

Hi Tejun and Johannes,

It seems like we ran into a lockdep complaint triggered by a recently queued
patch (87915adc3f0a (+ACI-workqueue: re-add lockdep dependencies for flushing+ACI)).
However, it's not clear to us whether anything is wrong with the code the
complaint refers to. Can any of you have a look? I have attached the lockep
complaint to this e-mail.

Thanks,

Bart.
-------------- next part --------------
======================================================
WARNING: possible circular locking dependency detected
4.19.0-rc6-dbg+ #1 Not tainted
------------------------------------------------------
kworker/u16:7/169 is trying to acquire lock:
00000000578ccf82 (&id_priv->handler_mutex){+.+.}, at: rdma_destroy_id+0x6f/0x440 [rdma_cm]

but task is already holding lock:
000000005d67271b ((work_completion)(&queue->release_work)){+.+.}, at: process_one_work+0x3ed/0xa20

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #3 ((work_completion)(&queue->release_work)){+.+.}:
       process_one_work+0x474/0xa20
       worker_thread+0x63/0x5a0
       kthread+0x1cf/0x1f0
       ret_from_fork+0x24/0x30

-> #2 ((wq_completion)"nvmet-rdma-delete-wq"){+.+.}:
       flush_workqueue+0xf3/0x970
       nvmet_rdma_cm_handler+0x1319/0x170f [nvmet_rdma]
       cma_ib_req_handler+0x72f/0xf90 [rdma_cm]
       cm_process_work+0x2e/0x110 [ib_cm]
       cm_req_handler+0x135b/0x1c30 [ib_cm]
       cm_work_handler+0x2b7/0x38cd [ib_cm]
       process_one_work+0x4ae/0xa20
       worker_thread+0x63/0x5a0
       kthread+0x1cf/0x1f0
       ret_from_fork+0x24/0x30

-> #1 (&id_priv->handler_mutex/1){+.+.}:
       __mutex_lock+0xfe/0xbe0
       mutex_lock_nested+0x1b/0x20
       cma_ib_req_handler+0x6aa/0xf90 [rdma_cm]
       cm_process_work+0x2e/0x110 [ib_cm]
       cm_req_handler+0x135b/0x1c30 [ib_cm]
       cm_work_handler+0x2b7/0x38cd [ib_cm]
       process_one_work+0x4ae/0xa20
       worker_thread+0x63/0x5a0
       kthread+0x1cf/0x1f0
       ret_from_fork+0x24/0x30

-> #0 (&id_priv->handler_mutex){+.+.}:
       lock_acquire+0xd2/0x210
       __mutex_lock+0xfe/0xbe0
       mutex_lock_nested+0x1b/0x20
       rdma_destroy_id+0x6f/0x440 [rdma_cm]
       nvmet_rdma_release_queue_work+0x8e/0x1b0 [nvmet_rdma]
       process_one_work+0x4ae/0xa20
       worker_thread+0x63/0x5a0
       kthread+0x1cf/0x1f0
       ret_from_fork+0x24/0x30

other info that might help us debug this:

Chain exists of:
  &id_priv->handler_mutex --> (wq_completion)"nvmet-rdma-delete-wq" --> (work_completion)(&queue->release_work)

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock((work_completion)(&queue->release_work));
                               lock((wq_completion)"nvmet-rdma-delete-wq");
                               lock((work_completion)(&queue->release_work));
  lock(&id_priv->handler_mutex);

 *** DEADLOCK ***

2 locks held by kworker/u16:7/169:
 #0: 00000000a32d4be9 ((wq_completion)"nvmet-rdma-delete-wq"){+.+.}, at: process_one_work+0x3ed/0xa20
 #1: 000000005d67271b ((work_completion)(&queue->release_work)){+.+.}, at: process_one_work+0x3ed/0xa20

stack backtrace:
CPU: 1 PID: 169 Comm: kworker/u16:7 Not tainted 4.19.0-rc6-dbg+ #1
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
Workqueue: nvmet-rdma-delete-wq nvmet_rdma_release_queue_work [nvmet_rdma]
Call Trace:
 dump_stack+0xa4/0xf5
 print_circular_bug.isra.32+0x20a/0x218
 __lock_acquire+0x1a5e/0x1b20
 lock_acquire+0xd2/0x210
 __mutex_lock+0xfe/0xbe0
 mutex_lock_nested+0x1b/0x20
 rdma_destroy_id+0x6f/0x440 [rdma_cm]
 nvmet_rdma_release_queue_work+0x8e/0x1b0 [nvmet_rdma]
 process_one_work+0x4ae/0xa20
 worker_thread+0x63/0x5a0
 kthread+0x1cf/0x1f0
 ret_from_fork+0x24/0x30

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-10-19 16:23     ` Bart Van Assche
@ 2018-10-22  8:56       ` Johannes Berg
  2018-10-22 21:17         ` Bart Van Assche
  2018-10-23  0:40         ` Sagi Grimberg
  0 siblings, 2 replies; 15+ messages in thread
From: Johannes Berg @ 2018-10-22  8:56 UTC (permalink / raw)


Hi Bart, all,

On Fri, 2018-10-19@16:23 +0000, Bart Van Assche wrote:
> On Thu, 2018-10-18@18:08 -0700, Sagi Grimberg wrote:
> > > It seems like this has not yet been fixed entirely. This is what appeared
> > > in the kernel log this morning on my test setup with Christoph's nvme-4.20
> > > branch (commit cb4bfda62afa ("nvme-pci: fix hot removal during error
> > > handling")):

FWIW, I'm not sure where to find this tree (it's not on kernel.org,
apparently, at least none of hch's?). As a result, I don't have the
correct code here now.

> > There is something I'm missing here, the id_priv->handler_mutex that the
> > connect context is running on is guaranteed to be different than the
> > one being removed (a different cm_id) and also the workqueues are
> > different.
> > 
> > Is it not allowed to flush workqueue A from a work that is hosted on
> > workqueue B?
> 
> It seems like we ran into a lockdep complaint triggered by a recently queued
> patch (87915adc3f0a ("workqueue: re-add lockdep dependencies for flushing")).
> However, it's not clear to us whether anything is wrong with the code the
> complaint refers to. Can any of you have a look? I have attached the lockep
> complaint to this e-mail.

Is it possible that you're just running into the general lockdep
limitation in that it doesn't know anything about *instances*, just
*classes*?

Without looking at the correct code I don't want to try to diagnose this
further, but that's what it seems like?

It's clearly telling you that you're acquiring "id_priv->handler_mutex"
and lists a long dependency chain:

  &id_priv->handler_mutex --> (wq_completion)"nvmet-rdma-delete-wq" --> (work_completion)(&queue->release_work)

so that you shouldn't be trying to acquire this mutex again from running
inside the release work:

  kworker/u16:7/169 is trying to acquire lock:
  00000000578ccf82 (&id_priv->handler_mutex){+.+.}, at: rdma_destroy_id+0x6f/0x440 [rdma_cm]

  but task is already holding lock:
  000000005d67271b ((work_completion)(&queue->release_work)){+.+.}, at: process_one_work+0x3ed/0xa20


This looks like lockdep is working as it should.

If these are indeed (guaranteed to be) different, then I'd say you'd
have to split them into different classes. However, I have no idea what
the rules are for the recursion here, so I can't really say how you
could do that.

johannes

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-10-22  8:56       ` Johannes Berg
@ 2018-10-22 21:17         ` Bart Van Assche
  2018-10-23 19:18           ` Johannes Berg
  2018-10-23  0:40         ` Sagi Grimberg
  1 sibling, 1 reply; 15+ messages in thread
From: Bart Van Assche @ 2018-10-22 21:17 UTC (permalink / raw)


On Mon, 2018-10-22@10:56 +0200, Johannes Berg wrote:
> On Fri, 2018-10-19@16:23 +0000, Bart Van Assche wrote:
> > On Thu, 2018-10-18@18:08 -0700, Sagi Grimberg wrote:
> > > > It seems like this has not yet been fixed entirely. This is what appeared
> > > > in the kernel log this morning on my test setup with Christoph's nvme-4.20
> > > > branch (commit cb4bfda62afa ("nvme-pci: fix hot removal during error
> > > > handling")):
> 
> FWIW, I'm not sure where to find this tree (it's not on kernel.org,
> apparently, at least none of hch's?). As a result, I don't have the
> correct code here now.

Christoph's NVMe tree is available at git://git.infradead.org/nvme.git.

Bart.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-10-22  8:56       ` Johannes Berg
  2018-10-22 21:17         ` Bart Van Assche
@ 2018-10-23  0:40         ` Sagi Grimberg
  2018-10-23 19:22           ` Johannes Berg
  1 sibling, 1 reply; 15+ messages in thread
From: Sagi Grimberg @ 2018-10-23  0:40 UTC (permalink / raw)



>>> There is something I'm missing here, the id_priv->handler_mutex that the
>>> connect context is running on is guaranteed to be different than the
>>> one being removed (a different cm_id) and also the workqueues are
>>> different.
>>>
>>> Is it not allowed to flush workqueue A from a work that is hosted on
>>> workqueue B?
>>
>> It seems like we ran into a lockdep complaint triggered by a recently queued
>> patch (87915adc3f0a ("workqueue: re-add lockdep dependencies for flushing")).
>> However, it's not clear to us whether anything is wrong with the code the
>> complaint refers to. Can any of you have a look? I have attached the lockep
>> complaint to this e-mail.
> 
> Is it possible that you're just running into the general lockdep
> limitation in that it doesn't know anything about *instances*, just
> *classes*?
> 
> Without looking at the correct code I don't want to try to diagnose this
> further, but that's what it seems like?
> 
> It's clearly telling you that you're acquiring "id_priv->handler_mutex"
> and lists a long dependency chain:
> 
>    &id_priv->handler_mutex --> (wq_completion)"nvmet-rdma-delete-wq" --> (work_completion)(&queue->release_work)
> 
> so that you shouldn't be trying to acquire this mutex again from running
> inside the release work:
> 
>    kworker/u16:7/169 is trying to acquire lock:
>    00000000578ccf82 (&id_priv->handler_mutex){+.+.}, at: rdma_destroy_id+0x6f/0x440 [rdma_cm]
> 
>    but task is already holding lock:
>    000000005d67271b ((work_completion)(&queue->release_work)){+.+.}, at: process_one_work+0x3ed/0xa20
> 
> 
> This looks like lockdep is working as it should.
> 
> If these are indeed (guaranteed to be) different, then I'd say you'd
> have to split them into different classes. However, I have no idea what
> the rules are for the recursion here, so I can't really say how you
> could do that.

I'm not sure how I can divide them into classes. The priv is really
an internal structure that surrounds a connection connection
representation. The priv->handler_mutex wraps every event handling
dispatched to the upper layer consumer.

The connection destruction barriers by acquiring and releasing this
event_handler mutex such that no events are handled by the

See drivers/infiniband/core/cma.c rdma_destroy_id()

In our case, one of the event handlers flushes a workqueue that
is hosting work items that essentially call rdma_destroy_id() on
connections that are guaranteed not to be the one currently handling
the event. So the priv is guaranteed to be a different instance.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-10-22 21:17         ` Bart Van Assche
@ 2018-10-23 19:18           ` Johannes Berg
  2018-10-23 19:54             ` Bart Van Assche
  0 siblings, 1 reply; 15+ messages in thread
From: Johannes Berg @ 2018-10-23 19:18 UTC (permalink / raw)


On Mon, 2018-10-22@14:17 -0700, Bart Van Assche wrote:
> On Mon, 2018-10-22@10:56 +0200, Johannes Berg wrote:
> > On Fri, 2018-10-19@16:23 +0000, Bart Van Assche wrote:
> > > On Thu, 2018-10-18@18:08 -0700, Sagi Grimberg wrote:
> > > > > It seems like this has not yet been fixed entirely. This is what appeared
> > > > > in the kernel log this morning on my test setup with Christoph's nvme-4.20
> > > > > branch (commit cb4bfda62afa ("nvme-pci: fix hot removal during error
> > > > > handling")):
> > 
> > FWIW, I'm not sure where to find this tree (it's not on kernel.org,
> > apparently, at least none of hch's?). As a result, I don't have the
> > correct code here now.
> 
> Christoph's NVMe tree is available at git://git.infradead.org/nvme.git.

Ok, thanks. I think I'll go off Sagi Grimberg's explanation though
rather than try to understand the code myself.

johannes

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-10-23  0:40         ` Sagi Grimberg
@ 2018-10-23 19:22           ` Johannes Berg
  0 siblings, 0 replies; 15+ messages in thread
From: Johannes Berg @ 2018-10-23 19:22 UTC (permalink / raw)


On Mon, 2018-10-22@17:40 -0700, Sagi Grimberg wrote:
> > > > 
> I'm not sure how I can divide them into classes. The priv is really
> an internal structure that surrounds a connection connection
> representation. The priv->handler_mutex wraps every event handling
> dispatched to the upper layer consumer.
> 
> The connection destruction barriers by acquiring and releasing this
> event_handler mutex such that no events are handled by the
> 
> See drivers/infiniband/core/cma.c rdma_destroy_id()
> 
> In our case, one of the event handlers flushes a workqueue that
> is hosting work items that essentially call rdma_destroy_id() on
> connections that are guaranteed not to be the one currently handling
> the event. So the priv is guaranteed to be a different instance.

I think the key here would be "are guaranteed not to be the one
currently handling the event". How exactly is that guaranteed?

Is there some sort of static guarantee for this?

But I guess the easiest thing to do would be to use mutex_lock_nested()
in some places here, or add something like flush_workqueue_nested().
That would let you annotate this place in the code, basically saying -
yes, I know I'm doing

CPU0			CPU1
mutex_lock(A)		start_work(B)
flush_work(B)		  mutex_lock(A)

but it's fine because I know, in this specific instance, that it's
really A' not A for the mutex. So if you were to do
	mutex_lock_nested(A, SINGLE_DEPTH_NESTING)
around the flush_work() - since your code knows that the flush_work() is
actually a different item and guaranteed to not build a graph connecting
back to itself, that should work to tell lockdep enough to not complain
here.

If this could actually recurse, but not to itself, you'd have to bump
the level up each time - which does get difficult since you're executing
asynchronously with the work queue. Not sure off the top of my head how
I'd solve that.

johannes

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-10-23 19:18           ` Johannes Berg
@ 2018-10-23 19:54             ` Bart Van Assche
  2018-10-23 19:59               ` Johannes Berg
  0 siblings, 1 reply; 15+ messages in thread
From: Bart Van Assche @ 2018-10-23 19:54 UTC (permalink / raw)


On Tue, 2018-10-23@21:18 +0200, Johannes Berg wrote:
> On Mon, 2018-10-22@14:17 -0700, Bart Van Assche wrote:
> > On Mon, 2018-10-22@10:56 +0200, Johannes Berg wrote:
> > > On Fri, 2018-10-19@16:23 +0000, Bart Van Assche wrote:
> > > > On Thu, 2018-10-18@18:08 -0700, Sagi Grimberg wrote:
> > > > > > It seems like this has not yet been fixed entirely. This is what appeared
> > > > > > in the kernel log this morning on my test setup with Christoph's nvme-4.20
> > > > > > branch (commit cb4bfda62afa ("nvme-pci: fix hot removal during error
> > > > > > handling")):
> > > 
> > > FWIW, I'm not sure where to find this tree (it's not on kernel.org,
> > > apparently, at least none of hch's?). As a result, I don't have the
> > > correct code here now.
> > 
> > Christoph's NVMe tree is available at git://git.infradead.org/nvme.git.
> 
> Ok, thanks. I think I'll go off Sagi Grimberg's explanation though
> rather than try to understand the code myself.

Are the lockdep annotations in kernel/workqueue.c correct? My understanding
is that lock_map_acquire() should only be used to annotate mutually exclusive
locking (e.g. mutex, spinlock). However, multiple work items associated with
the same workqueue can be executed concurrently. From lockdep.h:

#define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-10-23 19:54             ` Bart Van Assche
@ 2018-10-23 19:59               ` Johannes Berg
  2018-10-23 20:00                 ` Johannes Berg
  0 siblings, 1 reply; 15+ messages in thread
From: Johannes Berg @ 2018-10-23 19:59 UTC (permalink / raw)


On Tue, 2018-10-23@12:54 -0700, Bart Van Assche wrote:
> On Tue, 2018-10-23@21:18 +0200, Johannes Berg wrote:
> > On Mon, 2018-10-22@14:17 -0700, Bart Van Assche wrote:
> > > On Mon, 2018-10-22@10:56 +0200, Johannes Berg wrote:
> > > > On Fri, 2018-10-19@16:23 +0000, Bart Van Assche wrote:
> > > > > On Thu, 2018-10-18@18:08 -0700, Sagi Grimberg wrote:
> > > > > > > It seems like this has not yet been fixed entirely. This is what appeared
> > > > > > > in the kernel log this morning on my test setup with Christoph's nvme-4.20
> > > > > > > branch (commit cb4bfda62afa ("nvme-pci: fix hot removal during error
> > > > > > > handling")):
> > > > 
> > > > FWIW, I'm not sure where to find this tree (it's not on kernel.org,
> > > > apparently, at least none of hch's?). As a result, I don't have the
> > > > correct code here now.
> > > 
> > > Christoph's NVMe tree is available at git://git.infradead.org/nvme.git.
> > 
> > Ok, thanks. I think I'll go off Sagi Grimberg's explanation though
> > rather than try to understand the code myself.
> 
> Are the lockdep annotations in kernel/workqueue.c correct? My understanding
> is that lock_map_acquire() should only be used to annotate mutually exclusive
> locking (e.g. mutex, spinlock). However, multiple work items associated with
> the same workqueue can be executed concurrently. From lockdep.h:
> 
> #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)

I've talked about this in the other thread, in the interest of keeping
things together, can you ask that again over there?

johannes

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH RFC] nvmet-rdma: use a private workqueue for delete
  2018-10-23 19:59               ` Johannes Berg
@ 2018-10-23 20:00                 ` Johannes Berg
  0 siblings, 0 replies; 15+ messages in thread
From: Johannes Berg @ 2018-10-23 20:00 UTC (permalink / raw)


On Tue, 2018-10-23@21:59 +0200, Johannes Berg wrote:
> On Tue, 2018-10-23@12:54 -0700, Bart Van Assche wrote:
> > On Tue, 2018-10-23@21:18 +0200, Johannes Berg wrote:
> > > On Mon, 2018-10-22@14:17 -0700, Bart Van Assche wrote:
> > > > On Mon, 2018-10-22@10:56 +0200, Johannes Berg wrote:
> > > > > On Fri, 2018-10-19@16:23 +0000, Bart Van Assche wrote:
> > > > > > On Thu, 2018-10-18@18:08 -0700, Sagi Grimberg wrote:
> > > > > > > > It seems like this has not yet been fixed entirely. This is what appeared
> > > > > > > > in the kernel log this morning on my test setup with Christoph's nvme-4.20
> > > > > > > > branch (commit cb4bfda62afa ("nvme-pci: fix hot removal during error
> > > > > > > > handling")):
> > > > > 
> > > > > FWIW, I'm not sure where to find this tree (it's not on kernel.org,
> > > > > apparently, at least none of hch's?). As a result, I don't have the
> > > > > correct code here now.
> > > > 
> > > > Christoph's NVMe tree is available at git://git.infradead.org/nvme.git.
> > > 
> > > Ok, thanks. I think I'll go off Sagi Grimberg's explanation though
> > > rather than try to understand the code myself.
> > 
> > Are the lockdep annotations in kernel/workqueue.c correct? My understanding
> > is that lock_map_acquire() should only be used to annotate mutually exclusive
> > locking (e.g. mutex, spinlock). However, multiple work items associated with
> > the same workqueue can be executed concurrently. From lockdep.h:
> > 
> > #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
> 
> I've talked about this in the other thread, in the interest of keeping
> things together, can you ask that again over there?

Actually, I'll just go answer over there, no need to repost the
question.

johannes

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2018-10-23 20:00 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-27 18:00 [PATCH RFC] nvmet-rdma: use a private workqueue for delete Sagi Grimberg
2018-09-28 22:14 ` Bart Van Assche
2018-10-01 20:12   ` Sagi Grimberg
2018-10-02 15:02     ` Bart Van Assche
2018-10-05  7:25 ` Christoph Hellwig
     [not found] ` <CAO+b5-oBVw=-wvnWk1EF=RBaZtjX6bjUG+3WABXbvzX9UTu26w@mail.gmail.com>
2018-10-19  1:08   ` Sagi Grimberg
2018-10-19 16:23     ` Bart Van Assche
2018-10-22  8:56       ` Johannes Berg
2018-10-22 21:17         ` Bart Van Assche
2018-10-23 19:18           ` Johannes Berg
2018-10-23 19:54             ` Bart Van Assche
2018-10-23 19:59               ` Johannes Berg
2018-10-23 20:00                 ` Johannes Berg
2018-10-23  0:40         ` Sagi Grimberg
2018-10-23 19:22           ` Johannes Berg

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.