Stable Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 1/1] nbd: fix max number of supported devs
@ 2019-08-04 19:10 Mike Christie
  2019-08-05 18:19 ` Sasha Levin
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Mike Christie @ 2019-08-04 19:10 UTC (permalink / raw)
  To: josef, linux-block; +Cc: Mike Christie, stable

This fixes a bug added in 4.10 with commit:

commit 9561a7ade0c205bc2ee035a2ac880478dcc1a024
Author: Josef Bacik <jbacik@fb.com>
Date:   Tue Nov 22 14:04:40 2016 -0500

    nbd: add multi-connection support

that limited the number of devices to 256. Before the patch we could
create 1000s of devices, but the patch switched us from using our
own thread to using a work queue which has a default limit of 256
active works.

The problem is that our recv_work function sits in a loop until
disconnection but only handles IO for one connection. The work is
started when the connection is started/restarted, but if we end up
creating 257 or more connections, the queue_work call just queues
connection257+'s recv_work and that waits for connection 1 - 256's
recv_work to be disconnected and that work instance completing.

Instead of reverting back to kthreads, this has us allocate a
workqueue_struct per device, so we can block in the work.

Cc: stable@vger.kernel.org
Signed-off-by: Mike Christie <mchristi@redhat.com>
---
 drivers/block/nbd.c | 39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 9bcde2325893..cc4b2ae57409 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -108,6 +108,7 @@ struct nbd_device {
 	struct nbd_config *config;
 	struct mutex config_lock;
 	struct gendisk *disk;
+	struct workqueue_struct *recv_workq;
 
 	struct list_head list;
 	struct task_struct *task_recv;
@@ -138,7 +139,6 @@ static struct dentry *nbd_dbg_dir;
 
 static unsigned int nbds_max = 16;
 static int max_part = 16;
-static struct workqueue_struct *recv_workqueue;
 static int part_shift;
 
 static int nbd_dev_dbg_init(struct nbd_device *nbd);
@@ -1036,7 +1036,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
 		/* We take the tx_mutex in an error path in the recv_work, so we
 		 * need to queue_work outside of the tx_mutex.
 		 */
-		queue_work(recv_workqueue, &args->work);
+		queue_work(nbd->recv_workq, &args->work);
 
 		atomic_inc(&config->live_connections);
 		wake_up(&config->conn_wait);
@@ -1137,6 +1137,10 @@ static void nbd_config_put(struct nbd_device *nbd)
 		kfree(nbd->config);
 		nbd->config = NULL;
 
+		if (nbd->recv_workq)
+			destroy_workqueue(nbd->recv_workq);
+		nbd->recv_workq = NULL;
+
 		nbd->tag_set.timeout = 0;
 		nbd->disk->queue->limits.discard_granularity = 0;
 		nbd->disk->queue->limits.discard_alignment = 0;
@@ -1165,6 +1169,14 @@ static int nbd_start_device(struct nbd_device *nbd)
 		return -EINVAL;
 	}
 
+	nbd->recv_workq = alloc_workqueue("knbd%d-recv",
+					  WQ_MEM_RECLAIM | WQ_HIGHPRI |
+					  WQ_UNBOUND, 0, nbd->index);
+	if (!nbd->recv_workq) {
+		dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
+		return -ENOMEM;
+	}
+
 	blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
 	nbd->task_recv = current;
 
@@ -1195,7 +1207,7 @@ static int nbd_start_device(struct nbd_device *nbd)
 		INIT_WORK(&args->work, recv_work);
 		args->nbd = nbd;
 		args->index = i;
-		queue_work(recv_workqueue, &args->work);
+		queue_work(nbd->recv_workq, &args->work);
 	}
 	nbd_size_update(nbd);
 	return error;
@@ -1215,8 +1227,10 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
 	mutex_unlock(&nbd->config_lock);
 	ret = wait_event_interruptible(config->recv_wq,
 					 atomic_read(&config->recv_threads) == 0);
-	if (ret)
+	if (ret) {
 		sock_shutdown(nbd);
+		flush_workqueue(nbd->recv_workq);
+	}
 	mutex_lock(&nbd->config_lock);
 	nbd_bdev_reset(bdev);
 	/* user requested, ignore socket errors */
@@ -1875,6 +1889,12 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
 	nbd_disconnect(nbd);
 	nbd_clear_sock(nbd);
 	mutex_unlock(&nbd->config_lock);
+	/*
+	 * Make sure recv thread has finished, so it does not drop the last
+	 * config ref and try to destroy the workqueue from inside the work
+	 * queue.
+	 */
+	flush_workqueue(nbd->recv_workq);
 	if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
 			       &nbd->config->runtime_flags))
 		nbd_config_put(nbd);
@@ -2261,20 +2281,12 @@ static int __init nbd_init(void)
 
 	if (nbds_max > 1UL << (MINORBITS - part_shift))
 		return -EINVAL;
-	recv_workqueue = alloc_workqueue("knbd-recv",
-					 WQ_MEM_RECLAIM | WQ_HIGHPRI |
-					 WQ_UNBOUND, 0);
-	if (!recv_workqueue)
-		return -ENOMEM;
 
-	if (register_blkdev(NBD_MAJOR, "nbd")) {
-		destroy_workqueue(recv_workqueue);
+	if (register_blkdev(NBD_MAJOR, "nbd"))
 		return -EIO;
-	}
 
 	if (genl_register_family(&nbd_genl_family)) {
 		unregister_blkdev(NBD_MAJOR, "nbd");
-		destroy_workqueue(recv_workqueue);
 		return -EINVAL;
 	}
 	nbd_dbg_init();
@@ -2316,7 +2328,6 @@ static void __exit nbd_cleanup(void)
 
 	idr_destroy(&nbd_index_idr);
 	genl_unregister_family(&nbd_genl_family);
-	destroy_workqueue(recv_workqueue);
 	unregister_blkdev(NBD_MAJOR, "nbd");
 }
 
-- 
2.20.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] nbd: fix max number of supported devs
  2019-08-04 19:10 [PATCH 1/1] nbd: fix max number of supported devs Mike Christie
@ 2019-08-05 18:19 ` Sasha Levin
  2019-08-13 16:16 ` Josef Bacik
  2019-08-20 18:44 ` Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Sasha Levin @ 2019-08-05 18:19 UTC (permalink / raw)
  To: Sasha Levin; +Cc: Mike Christie, stable

Hi,

[This is an automated email]

This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: all

The bot has tested the following trees: v5.2.6, v4.19.64, v4.14.136, v4.9.187, v4.4.187.

v5.2.6: Build OK!
v4.19.64: Build OK!
v4.14.136: Failed to apply! Possible dependencies:
    2189c97cdbed ("block/ndb: add WQ_UNBOUND to the knbd-recv workqueue")

v4.9.187: Failed to apply! Possible dependencies:
    20032ec38d16 ("nbd: reset the setup task for NBD_CLEAR_SOCK")
    5ea8d10802ec ("nbd: separate out the config information")
    9442b739207a ("nbd: cleanup ioctl handling")
    9561a7ade0c2 ("nbd: add multi-connection support")
    feffa5cc7b47 ("nbd: fix setting of 'error' in NBD_DO_IT ioctl")

v4.4.187: Failed to apply! Possible dependencies:
    0e4f0f6f63d3 ("nbd: Cleanup reset of nbd and bdev after a disconnect")
    1f7b5cf1be43 ("nbd: Timeouts are not user requested disconnects")
    23272a6754b8 ("nbd: Remove signal usage")
    37091fdd831f ("nbd: Create size change events for userspace")
    5ea8d10802ec ("nbd: separate out the config information")
    9561a7ade0c2 ("nbd: add multi-connection support")
    97240963eb30 ("nbd: fix race in ioctl")
    9b4a6ba9185a ("nbd: use flags instead of bool")
    fd8383fd88a2 ("nbd: convert to blkmq")


NOTE: The patch will not be queued to stable trees until it is upstream.

How should we proceed with this patch?

--
Thanks,
Sasha

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] nbd: fix max number of supported devs
  2019-08-04 19:10 [PATCH 1/1] nbd: fix max number of supported devs Mike Christie
  2019-08-05 18:19 ` Sasha Levin
@ 2019-08-13 16:16 ` Josef Bacik
  2019-08-20 18:44 ` Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Josef Bacik @ 2019-08-13 16:16 UTC (permalink / raw)
  To: Mike Christie; +Cc: josef, linux-block, stable

On Sun, Aug 04, 2019 at 02:10:06PM -0500, Mike Christie wrote:
> This fixes a bug added in 4.10 with commit:
> 
> commit 9561a7ade0c205bc2ee035a2ac880478dcc1a024
> Author: Josef Bacik <jbacik@fb.com>
> Date:   Tue Nov 22 14:04:40 2016 -0500
> 
>     nbd: add multi-connection support
> 
> that limited the number of devices to 256. Before the patch we could
> create 1000s of devices, but the patch switched us from using our
> own thread to using a work queue which has a default limit of 256
> active works.
> 
> The problem is that our recv_work function sits in a loop until
> disconnection but only handles IO for one connection. The work is
> started when the connection is started/restarted, but if we end up
> creating 257 or more connections, the queue_work call just queues
> connection257+'s recv_work and that waits for connection 1 - 256's
> recv_work to be disconnected and that work instance completing.
> 
> Instead of reverting back to kthreads, this has us allocate a
> workqueue_struct per device, so we can block in the work.

Woops, thanks for fixing this.  Sorry I was out of the office when this went
through and forgot to come back to it.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>

Thanks,

Josef

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] nbd: fix max number of supported devs
  2019-08-04 19:10 [PATCH 1/1] nbd: fix max number of supported devs Mike Christie
  2019-08-05 18:19 ` Sasha Levin
  2019-08-13 16:16 ` Josef Bacik
@ 2019-08-20 18:44 ` Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2019-08-20 18:44 UTC (permalink / raw)
  To: Mike Christie, josef, linux-block; +Cc: stable

On 8/4/19 1:10 PM, Mike Christie wrote:
> This fixes a bug added in 4.10 with commit:
> 
> commit 9561a7ade0c205bc2ee035a2ac880478dcc1a024
> Author: Josef Bacik <jbacik@fb.com>
> Date:   Tue Nov 22 14:04:40 2016 -0500
> 
>      nbd: add multi-connection support
> 
> that limited the number of devices to 256. Before the patch we could
> create 1000s of devices, but the patch switched us from using our
> own thread to using a work queue which has a default limit of 256
> active works.
> 
> The problem is that our recv_work function sits in a loop until
> disconnection but only handles IO for one connection. The work is
> started when the connection is started/restarted, but if we end up
> creating 257 or more connections, the queue_work call just queues
> connection257+'s recv_work and that waits for connection 1 - 256's
> recv_work to be disconnected and that work instance completing.
> 
> Instead of reverting back to kthreads, this has us allocate a
> workqueue_struct per device, so we can block in the work.

Applied for 5.4, thanks Mike.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, back to index

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-04 19:10 [PATCH 1/1] nbd: fix max number of supported devs Mike Christie
2019-08-05 18:19 ` Sasha Levin
2019-08-13 16:16 ` Josef Bacik
2019-08-20 18:44 ` Jens Axboe

Stable Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/stable/0 stable/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 stable stable/ https://lore.kernel.org/stable \
		stable@vger.kernel.org stable@archiver.kernel.org
	public-inbox-index stable


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.stable


AGPL code for this site: git clone https://public-inbox.org/ public-inbox