All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] RDMA/core: Avoid flush_workqueue(system_unbound_wq) usage
@ 2022-04-18 13:30 Tetsuo Handa
  2022-04-19 21:01 ` Jason Gunthorpe
  0 siblings, 1 reply; 4+ messages in thread
From: Tetsuo Handa @ 2022-04-18 13:30 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky; +Cc: OFED mailing list

Flushing system-wide workqueues is dangerous and will be forbidden.
Replace system_unbound_wq with local ib_unbound_wq.

Link: https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
Note: This patch is only compile tested.

 drivers/infiniband/core/device.c | 25 +++++++++++++++----------
 drivers/infiniband/core/ucma.c   |  2 +-
 drivers/infiniband/hw/mlx5/odp.c |  4 ++--
 include/rdma/ib_verbs.h          |  1 +
 4 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 4deb60a3b43f..75d9f592c6a1 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -58,6 +58,8 @@ struct workqueue_struct *ib_comp_wq;
 struct workqueue_struct *ib_comp_unbound_wq;
 struct workqueue_struct *ib_wq;
 EXPORT_SYMBOL_GPL(ib_wq);
+struct workqueue_struct *ib_unbound_wq;
+EXPORT_SYMBOL_GPL(ib_unbound_wq);
 
 /*
  * Each of the three rwsem locks (devices, clients, client_data) protects the
@@ -1602,7 +1604,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
 	WARN_ON(!refcount_read(&ib_dev->refcount));
 	WARN_ON(!ib_dev->ops.dealloc_driver);
 	get_device(&ib_dev->dev);
-	if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
+	if (!queue_work(ib_unbound_wq, &ib_dev->unregistration_work))
 		put_device(&ib_dev->dev);
 }
 EXPORT_SYMBOL(ib_unregister_device_queued);
@@ -2751,27 +2753,28 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
 
 static int __init ib_core_init(void)
 {
-	int ret;
+	int ret = -ENOMEM;
 
 	ib_wq = alloc_workqueue("infiniband", 0, 0);
 	if (!ib_wq)
 		return -ENOMEM;
 
+	ib_unbound_wq = alloc_workqueue("ib-unb-wq", WQ_UNBOUND,
+					WQ_UNBOUND_MAX_ACTIVE);
+	if (!ib_unbound_wq)
+		goto err;
+
 	ib_comp_wq = alloc_workqueue("ib-comp-wq",
 			WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
-	if (!ib_comp_wq) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	if (!ib_comp_wq)
+		goto err_unbound;
 
 	ib_comp_unbound_wq =
 		alloc_workqueue("ib-comp-unb-wq",
 				WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
 				WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
-	if (!ib_comp_unbound_wq) {
-		ret = -ENOMEM;
+	if (!ib_comp_unbound_wq)
 		goto err_comp;
-	}
 
 	ret = class_register(&ib_class);
 	if (ret) {
@@ -2831,6 +2834,8 @@ static int __init ib_core_init(void)
 	destroy_workqueue(ib_comp_unbound_wq);
 err_comp:
 	destroy_workqueue(ib_comp_wq);
+err_unbound:
+	destroy_workqueue(ib_unbound_wq);
 err:
 	destroy_workqueue(ib_wq);
 	return ret;
@@ -2852,7 +2857,7 @@ static void __exit ib_core_cleanup(void)
 	destroy_workqueue(ib_comp_wq);
 	/* Make sure that any pending umem accounting work is done. */
 	destroy_workqueue(ib_wq);
-	flush_workqueue(system_unbound_wq);
+	destroy_workqueue(ib_unbound_wq);
 	WARN_ON(!xa_empty(&clients));
 	WARN_ON(!xa_empty(&devices));
 }
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 9d6ac9dff39a..65c31c7a3af1 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -360,7 +360,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
 	if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
 		xa_lock(&ctx_table);
 		if (xa_load(&ctx_table, ctx->id) == ctx)
-			queue_work(system_unbound_wq, &ctx->close_work);
+			queue_work(ib_unbound_wq, &ctx->close_work);
 		xa_unlock(&ctx_table);
 	}
 	return 0;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 41c964a45f89..7611a28a4573 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -220,7 +220,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
 
 	/* Freeing a MR is a sleeping operation, so bounce to a work queue */
 	INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
-	queue_work(system_unbound_wq, &mr->odp_destroy.work);
+	queue_work(ib_unbound_wq, &mr->odp_destroy.work);
 }
 
 static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
@@ -1818,6 +1818,6 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
 		destroy_prefetch_work(work);
 		return rc;
 	}
-	queue_work(system_unbound_wq, &work->work);
+	queue_work(ib_unbound_wq, &work->work);
 	return 0;
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index b0bc9de5e9a8..8be23c03f412 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -54,6 +54,7 @@ struct ib_port;
 struct hw_stats_device_data;
 
 extern struct workqueue_struct *ib_wq;
+extern struct workqueue_struct *ib_unbound_wq;
 extern struct workqueue_struct *ib_comp_wq;
 extern struct workqueue_struct *ib_comp_unbound_wq;
 
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] RDMA/core: Avoid flush_workqueue(system_unbound_wq) usage
  2022-04-18 13:30 [PATCH] RDMA/core: Avoid flush_workqueue(system_unbound_wq) usage Tetsuo Handa
@ 2022-04-19 21:01 ` Jason Gunthorpe
  2022-04-19 22:18   ` [PATCH v2] " Tetsuo Handa
  0 siblings, 1 reply; 4+ messages in thread
From: Jason Gunthorpe @ 2022-04-19 21:01 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: Leon Romanovsky, OFED mailing list

On Mon, Apr 18, 2022 at 10:30:47PM +0900, Tetsuo Handa wrote:
> Flushing system-wide workqueues is dangerous and will be forbidden.
> Replace system_unbound_wq with local ib_unbound_wq.

I don't really like this patch, the whole reason to get rid of
flushing the system_wq is due to unknowable interactions between work
on the queue. Creating a subsystem wide WQ is creating the same
problem, just in a microcosm.

The flush_work exists because of this:

> @@ -1602,7 +1604,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
>  	WARN_ON(!refcount_read(&ib_dev->refcount));
>  	WARN_ON(!ib_dev->ops.dealloc_driver);
>  	get_device(&ib_dev->dev);
> -	if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
> +	if (!queue_work(ib_unbound_wq, &ib_dev->unregistration_work))
>  		put_device(&ib_dev->dev);
>  }

Which fires off a work that must be completed before the module is
unloaded.

It seems like a big waste to create an entire WQ just for a single
infrequent user like this.

Is there some other solution?

Jason

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2] RDMA/core: Avoid flush_workqueue(system_unbound_wq) usage
  2022-04-19 21:01 ` Jason Gunthorpe
@ 2022-04-19 22:18   ` Tetsuo Handa
  2022-04-25 19:53     ` Jason Gunthorpe
  0 siblings, 1 reply; 4+ messages in thread
From: Tetsuo Handa @ 2022-04-19 22:18 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Leon Romanovsky, OFED mailing list

On 2022/04/20 6:01, Jason Gunthorpe wrote:
> The flush_work exists because of this:
> 
>> @@ -1602,7 +1604,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
>>  	WARN_ON(!refcount_read(&ib_dev->refcount));
>>  	WARN_ON(!ib_dev->ops.dealloc_driver);
>>  	get_device(&ib_dev->dev);
>> -	if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
>> +	if (!queue_work(ib_unbound_wq, &ib_dev->unregistration_work))
>>  		put_device(&ib_dev->dev);
>>  }
> 
> Which fires off a work that must be completed before the module is
> unloaded.

I see. Here is an updated patch.

> 
> It seems like a big waste to create an entire WQ just for a single
> infrequent user like this.

No need to worry about resource wasting, for creating a WQ without
WQ_MEM_RECLAIM flag consumes little memory.

> 
> Is there some other solution?

No.

From 731f7f167a95946cac52da6cf5964a2904a9164c Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Wed, 20 Apr 2022 07:10:50 +0900
Subject: [PATCH v2] RDMA/core: Avoid flush_workqueue(system_unbound_wq) usage

Flushing system-wide workqueues is dangerous and will be forbidden.
Replace system_unbound_wq with local ib_unbound_wq.

Link: https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
Changes in v2:
  Need to use local ib_unbound_wq only when unloading, pointed by Jason Gunthorpe <jgg@ziepe.ca>.

 drivers/infiniband/core/device.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 4deb60a3b43f..f0fdf4dbb458 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -58,6 +58,7 @@ struct workqueue_struct *ib_comp_wq;
 struct workqueue_struct *ib_comp_unbound_wq;
 struct workqueue_struct *ib_wq;
 EXPORT_SYMBOL_GPL(ib_wq);
+static struct workqueue_struct *ib_unbound_wq;
 
 /*
  * Each of the three rwsem locks (devices, clients, client_data) protects the
@@ -1602,7 +1603,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
 	WARN_ON(!refcount_read(&ib_dev->refcount));
 	WARN_ON(!ib_dev->ops.dealloc_driver);
 	get_device(&ib_dev->dev);
-	if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
+	if (!queue_work(ib_unbound_wq, &ib_dev->unregistration_work))
 		put_device(&ib_dev->dev);
 }
 EXPORT_SYMBOL(ib_unregister_device_queued);
@@ -2751,27 +2752,28 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
 
 static int __init ib_core_init(void)
 {
-	int ret;
+	int ret = -ENOMEM;
 
 	ib_wq = alloc_workqueue("infiniband", 0, 0);
 	if (!ib_wq)
 		return -ENOMEM;
 
+	ib_unbound_wq = alloc_workqueue("ib-unb-wq", WQ_UNBOUND,
+					WQ_UNBOUND_MAX_ACTIVE);
+	if (!ib_unbound_wq)
+		goto err;
+
 	ib_comp_wq = alloc_workqueue("ib-comp-wq",
 			WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
-	if (!ib_comp_wq) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	if (!ib_comp_wq)
+		goto err_unbound;
 
 	ib_comp_unbound_wq =
 		alloc_workqueue("ib-comp-unb-wq",
 				WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
 				WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
-	if (!ib_comp_unbound_wq) {
-		ret = -ENOMEM;
+	if (!ib_comp_unbound_wq)
 		goto err_comp;
-	}
 
 	ret = class_register(&ib_class);
 	if (ret) {
@@ -2831,6 +2833,8 @@ static int __init ib_core_init(void)
 	destroy_workqueue(ib_comp_unbound_wq);
 err_comp:
 	destroy_workqueue(ib_comp_wq);
+err_unbound:
+	destroy_workqueue(ib_unbound_wq);
 err:
 	destroy_workqueue(ib_wq);
 	return ret;
@@ -2852,7 +2856,7 @@ static void __exit ib_core_cleanup(void)
 	destroy_workqueue(ib_comp_wq);
 	/* Make sure that any pending umem accounting work is done. */
 	destroy_workqueue(ib_wq);
-	flush_workqueue(system_unbound_wq);
+	destroy_workqueue(ib_unbound_wq);
 	WARN_ON(!xa_empty(&clients));
 	WARN_ON(!xa_empty(&devices));
 }
-- 
2.32.0



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] RDMA/core: Avoid flush_workqueue(system_unbound_wq) usage
  2022-04-19 22:18   ` [PATCH v2] " Tetsuo Handa
@ 2022-04-25 19:53     ` Jason Gunthorpe
  0 siblings, 0 replies; 4+ messages in thread
From: Jason Gunthorpe @ 2022-04-25 19:53 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: Leon Romanovsky, OFED mailing list


> >From 731f7f167a95946cac52da6cf5964a2904a9164c Mon Sep 17 00:00:00 2001
> From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Date: Wed, 20 Apr 2022 07:10:50 +0900
> Subject: [PATCH v2] RDMA/core: Avoid flush_workqueue(system_unbound_wq) usage
> 
> Flushing system-wide workqueues is dangerous and will be forbidden.
> Replace system_unbound_wq with local ib_unbound_wq.
> 
> Link: https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Changes in v2:
>   Need to use local ib_unbound_wq only when unloading, pointed by Jason Gunthorpe <jgg@ziepe.ca>.
> 
>  drivers/infiniband/core/device.c | 24 ++++++++++++++----------
>  1 file changed, 14 insertions(+), 10 deletions(-)

I renamed it to ib_unreg_wq but otherwise applied, thanks

Jason

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-04-25 19:53 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-18 13:30 [PATCH] RDMA/core: Avoid flush_workqueue(system_unbound_wq) usage Tetsuo Handa
2022-04-19 21:01 ` Jason Gunthorpe
2022-04-19 22:18   ` [PATCH v2] " Tetsuo Handa
2022-04-25 19:53     ` Jason Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.