linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4]nbd: fixes for nbd
@ 2016-05-24 11:26 Pranay Kr. Srivastava
  2016-05-24 11:26 ` [PATCH 1/4] fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
                   ` (5 more replies)
  0 siblings, 6 replies; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-05-24 11:26 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

This patch series fixes the following

1) fix might_sleep warning on socket shutdown:
   Fix sock_shutdown to avoid calling kernel_sock_shutdown
   while holding spin_lock.

2)fix various coding standard warnings
   Make shutdown get called in a process context instead, using
   system_wq.

3) make nbd device wait for its users.
   When a timeout or error occurs then nbd driver simply kills
   the block device. Many filesystem(s) example ext2/ext3 don't
   expect their buffer heads to disappear like that. Fix this
   by making nbd device wait for its users.

4) use device_attr macros for sysfs attribute
   use DEVICE_ATTR_RO for sysfs pid attribute.

Pranay Kr. Srivastava (4):
  fix might_sleep warning on socket shutdown.
  fix various coding standard warnings
  make nbd device wait for its users.
  use device_attr macros for sysfs attribute

 drivers/block/nbd.c |  150 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 105 insertions(+), 45 deletions(-)

-- 
1.7.9.5

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 1/4] fix might_sleep warning on socket shutdown.
  2016-05-24 11:26 [PATCH 0/4]nbd: fixes for nbd Pranay Kr. Srivastava
@ 2016-05-24 11:26 ` Pranay Kr. Srivastava
  2016-05-30 12:06   ` Markus Pargmann
  2016-05-24 11:26 ` [PATCH 2/4] fix various coding standard warnings Pranay Kr. Srivastava
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-05-24 11:26 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

spinlocked ranges should be small and not contain calls into huge
subfunctions. Fix my mistake and just get the pointer to the socket
instead of doing everything with spinlock held.

Reported-by: Mikulas Patocka <mikulas@twibright.com>
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>

Changelog:
Pranay Kr. Srivastava<pranjas@gmail.com>:

1) Use spin_lock instead of irq version for sock_shutdown.

2) Use system work queue to actually trigger the shutdown of
   socket. This solves the issue when kernel_sendmsg is currently
   blocked while a timeout occurs.
Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
---
 drivers/block/nbd.c |   62 +++++++++++++++++++++++++++++----------------------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 08afbc7..a5dab48 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -39,6 +39,7 @@
 #include <asm/types.h>
 
 #include <linux/nbd.h>
+#include <linux/workqueue.h>
 
 struct nbd_device {
 	u32 flags;
@@ -57,7 +58,7 @@ struct nbd_device {
 	int blksize;
 	loff_t bytesize;
 	int xmit_timeout;
-	bool timedout;
+	atomic_t timedout;
 	bool disconnect; /* a disconnect has been requested by user */
 
 	struct timer_list timeout_timer;
@@ -69,6 +70,7 @@ struct nbd_device {
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct dentry *dbg_dir;
 #endif
+	struct work_struct ws_nbd;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -94,6 +96,7 @@ static int max_part;
  * Thanks go to Jens Axboe and Al Viro for their LKML emails explaining this!
  */
 static DEFINE_SPINLOCK(nbd_lock);
+static void nbd_work_func(struct work_struct *);
 
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
@@ -172,39 +175,31 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
  */
 static void sock_shutdown(struct nbd_device *nbd)
 {
-	spin_lock_irq(&nbd->sock_lock);
-
-	if (!nbd->sock) {
-		spin_unlock_irq(&nbd->sock_lock);
-		return;
-	}
+	struct socket *sock;
 
-	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
-	kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
-	sockfd_put(nbd->sock);
+	spin_lock(&nbd->sock_lock);
+	sock = nbd->sock;
 	nbd->sock = NULL;
-	spin_unlock_irq(&nbd->sock_lock);
+	spin_unlock(&nbd->sock_lock);
+
+	if (!sock)
+		return;
 
 	del_timer(&nbd->timeout_timer);
+	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
+	kernel_sock_shutdown(sock, SHUT_RDWR);
+	sockfd_put(sock);
 }
 
 static void nbd_xmit_timeout(unsigned long arg)
 {
 	struct nbd_device *nbd = (struct nbd_device *)arg;
-	unsigned long flags;
 
 	if (list_empty(&nbd->queue_head))
 		return;
-
-	spin_lock_irqsave(&nbd->sock_lock, flags);
-
-	nbd->timedout = true;
-
-	if (nbd->sock)
-		kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
-
-	spin_unlock_irqrestore(&nbd->sock_lock, flags);
-
+	atomic_inc(&nbd->timedout);
+	schedule_work(&nbd->ws_nbd);
+	wake_up(&nbd->waiting_wq);
 	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
 }
 
@@ -592,7 +587,11 @@ static int nbd_thread_send(void *data)
 		spin_unlock_irq(&nbd->queue_lock);
 
 		/* handle request */
-		nbd_handle_req(nbd, req);
+		if (atomic_read(&nbd->timedout)) {
+			req->errors++;
+			nbd_end_request(nbd, req);
+		} else
+			nbd_handle_req(nbd, req);
 	}
 
 	nbd->task_send = NULL;
@@ -666,12 +665,13 @@ out:
 static void nbd_reset(struct nbd_device *nbd)
 {
 	nbd->disconnect = false;
-	nbd->timedout = false;
+	atomic_set(&nbd->timedout, 0);
 	nbd->blksize = 1024;
 	nbd->bytesize = 0;
 	set_capacity(nbd->disk, 0);
 	nbd->flags = 0;
 	nbd->xmit_timeout = 0;
+	INIT_WORK(&nbd->ws_nbd, nbd_work_func);
 	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 	del_timer_sync(&nbd->timeout_timer);
 }
@@ -804,16 +804,16 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		nbd_dev_dbg_close(nbd);
 		kthread_stop(thread);
 
-		mutex_lock(&nbd->tx_lock);
-
 		sock_shutdown(nbd);
+		mutex_lock(&nbd->tx_lock);
 		nbd_clear_que(nbd);
 		kill_bdev(bdev);
 		nbd_bdev_reset(bdev);
 
 		if (nbd->disconnect) /* user requested, ignore socket errors */
 			error = 0;
-		if (nbd->timedout)
+
+		if (atomic_read(&nbd->timedout))
 			error = -ETIMEDOUT;
 
 		nbd_reset(nbd);
@@ -863,6 +863,14 @@ static const struct block_device_operations nbd_fops =
 	.compat_ioctl =	nbd_ioctl,
 };
 
+static void nbd_work_func(struct work_struct *ws_nbd)
+{
+	struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
+								ws_nbd);
+
+	sock_shutdown(nbd_dev);
+}
+
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 
 static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
-- 
1.7.9.5

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 2/4] fix various coding standard warnings
  2016-05-24 11:26 [PATCH 0/4]nbd: fixes for nbd Pranay Kr. Srivastava
  2016-05-24 11:26 ` [PATCH 1/4] fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
@ 2016-05-24 11:26 ` Pranay Kr. Srivastava
  2016-05-24 11:26 ` [PATCH 3/4] make nbd device wait for its users Pranay Kr. Srivastava
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-05-24 11:26 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

1 )nbd: fix checkpatch trailing space warning.

2) nbd: fix checkpatch warning use linux/uaccess.h

3) nbd : fix checkpatch pointer declaration warning

4) nbd: fix checkpatch warning no newline after decleration.

5) nbd: fix checkpatch warning no newline after decleration.

6) nbd : fix checkpatch line over 80 char warning

7) nbd: fix checkpatch trailing whitespace warning.

8) nbd: fix checkpatch trailing whitespace warning.

9) nbd : fix checkpatch structure declaration braces on next line warning.

10) nbd : fix checkpatch trailing whitespace warning

11) nbd : fix checkpatch printk warning

12) nbd: fix checkpatch no extra line after decleration warning

13) nbd: fix checkpatch printk warning to pr_info

14) nbd: fix checkpatch no new line after decleration warning

15) nbd: fix checkpatch printk warning to pr_info

Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
---
 drivers/block/nbd.c |   29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index a5dab48..af86c9b 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -3,7 +3,7 @@
  *
  * Note that you can not swap over this thing, yet. Seems to work but
  * deadlocks sometimes - you can not swap over TCP in general.
- * 
+ *
  * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
  * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
  *
@@ -35,7 +35,7 @@
 #include <linux/types.h>
 #include <linux/debugfs.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/types.h>
 
 #include <linux/nbd.h>
@@ -43,7 +43,7 @@
 
 struct nbd_device {
 	u32 flags;
-	struct socket * sock;	/* If == NULL, device is not ready, yet	*/
+	struct socket *sock;	/* If == NULL, device is not ready, yet	*/
 	int magic;
 
 	spinlock_t queue_lock;
@@ -261,6 +261,7 @@ static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec,
 {
 	int result;
 	void *kaddr = kmap(bvec->bv_page);
+
 	result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset,
 			   bvec->bv_len, flags);
 	kunmap(bvec->bv_page);
@@ -358,6 +359,7 @@ static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
 {
 	int result;
 	void *kaddr = kmap(bvec->bv_page);
+
 	result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len,
 			MSG_WAITALL);
 	kunmap(bvec->bv_page);
@@ -600,8 +602,8 @@ static int nbd_thread_send(void *data)
 }
 
 /*
- * We always wait for result of write, for now. It would be nice to make it optional
- * in future
+ * We always wait for result of write, for now. It would be nice to make it
+ * optional in future
  * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK))
  *   { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
  */
@@ -610,7 +612,7 @@ static void nbd_request_handler(struct request_queue *q)
 		__releases(q->queue_lock) __acquires(q->queue_lock)
 {
 	struct request *req;
-	
+
 	while ((req = blk_fetch_request(q)) != NULL) {
 		struct nbd_device *nbd;
 
@@ -729,7 +731,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		nbd_send_req(nbd, &sreq);
 		return 0;
 	}
- 
+
 	case NBD_CLEAR_SOCK:
 		sock_shutdown(nbd);
 		nbd_clear_que(nbd);
@@ -856,8 +858,7 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 	return error;
 }
 
-static const struct block_device_operations nbd_fops =
-{
+static const struct block_device_operations nbd_fops = {
 	.owner =	THIS_MODULE,
 	.ioctl =	nbd_ioctl,
 	.compat_ioctl =	nbd_ioctl,
@@ -1000,7 +1001,7 @@ static void nbd_dbg_close(void)
 #endif
 
 /*
- * And here should be modules and kernel interface 
+ * And here should be modules and kernel interface
  *  (Just smiley confuses emacs :-)
  */
 
@@ -1013,7 +1014,7 @@ static int __init nbd_init(void)
 	BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
 
 	if (max_part < 0) {
-		printk(KERN_ERR "nbd: max_part must be >= 0\n");
+		pr_err("nbd: max_part must be >= 0\n");
 		return -EINVAL;
 	}
 
@@ -1044,6 +1045,7 @@ static int __init nbd_init(void)
 
 	for (i = 0; i < nbds_max; i++) {
 		struct gendisk *disk = alloc_disk(1 << part_shift);
+
 		if (!disk)
 			goto out;
 		nbd_dev[i].disk = disk;
@@ -1074,12 +1076,13 @@ static int __init nbd_init(void)
 		goto out;
 	}
 
-	printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
+	pr_info("nbd: registered device at major %d\n", NBD_MAJOR);
 
 	nbd_dbg_init();
 
 	for (i = 0; i < nbds_max; i++) {
 		struct gendisk *disk = nbd_dev[i].disk;
+
 		nbd_dev[i].magic = NBD_MAGIC;
 		INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
 		spin_lock_init(&nbd_dev[i].queue_lock);
@@ -1127,7 +1130,7 @@ static void __exit nbd_cleanup(void)
 	}
 	unregister_blkdev(NBD_MAJOR, "nbd");
 	kfree(nbd_dev);
-	printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR);
+	pr_info("nbd: unregistered device at major %d\n", NBD_MAJOR);
 }
 
 module_init(nbd_init);
-- 
1.7.9.5

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 3/4] make nbd device wait for its users.
  2016-05-24 11:26 [PATCH 0/4]nbd: fixes for nbd Pranay Kr. Srivastava
  2016-05-24 11:26 ` [PATCH 1/4] fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
  2016-05-24 11:26 ` [PATCH 2/4] fix various coding standard warnings Pranay Kr. Srivastava
@ 2016-05-24 11:26 ` Pranay Kr. Srivastava
  2016-05-30 10:44   ` Markus Pargmann
  2016-05-24 11:26 ` [PATCH 4/4] use device_attr macros for sysfs attribute Pranay Kr. Srivastava
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-05-24 11:26 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

When a timeout occurs or a recv fails, then
instead of abruplty killing nbd block device
wait for it's users to finish.

This is more required when filesystem(s) like
ext2 or ext3 don't expect their buffer heads to
disappear while the filesystem is mounted.

Use a kref for users using this. The device will
be released for kref count of 2, not less or more.

Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
---
 drivers/block/nbd.c |   51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index af86c9b..59db890 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -71,6 +71,8 @@ struct nbd_device {
 	struct dentry *dbg_dir;
 #endif
 	struct work_struct ws_nbd;
+	struct kref users;
+	struct completion user_completion;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -674,6 +676,7 @@ static void nbd_reset(struct nbd_device *nbd)
 	nbd->flags = 0;
 	nbd->xmit_timeout = 0;
 	INIT_WORK(&nbd->ws_nbd, nbd_work_func);
+	init_completion(&nbd->user_completion);
 	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 	del_timer_sync(&nbd->timeout_timer);
 }
@@ -807,6 +810,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		kthread_stop(thread);
 
 		sock_shutdown(nbd);
+		wait_for_completion(&nbd->user_completion);
 		mutex_lock(&nbd->tx_lock);
 		nbd_clear_que(nbd);
 		kill_bdev(bdev);
@@ -858,12 +862,58 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 	return error;
 }
 
+static void nbd_kref_release(struct kref *kref_users)
+{
+	struct nbd_device *nbd = container_of(kref_users, struct nbd_device,
+			users);
+	pr_debug("Releasing kref [%s]\n", __FUNCTION__);
+	complete(&nbd->user_completion);
+
+}
+
+static int nbd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct nbd_device *nbd_dev = bdev->bd_disk->private_data;
+
+	kref_get(&nbd_dev->users);
+	pr_debug("Opening nbd_dev %s. Active users = %u\n",
+			bdev->bd_disk->disk_name,
+			atomic_read(&nbd_dev->users.refcount) - 1);
+	return 0;
+}
+
+static void nbd_release(struct gendisk *disk, fmode_t mode)
+{
+	struct nbd_device *nbd_dev = disk->private_data;
+	/*
+	*kref_init initializes ref count to 1, so we
+	*we check for refcount to be 2 for a final put.
+	*
+	*kref needs to be re-initialized just here as the
+	*other process holding it must see the ref count as 2.
+	*/
+	kref_put(&nbd_dev->users,  nbd_kref_release);
+
+	if (atomic_read(&nbd_dev->users.refcount) == 2) {
+		kref_sub(&nbd_dev->users, 2, nbd_kref_release);
+		kref_init(&nbd_dev->users);
+		kref_get(&nbd_dev->users);
+	}
+
+	pr_debug("Closing nbd_dev %s. Active users = %u\n",
+			disk->disk_name,
+			atomic_read(&nbd_dev->users.refcount) - 1);
+}
+
 static const struct block_device_operations nbd_fops = {
 	.owner =	THIS_MODULE,
 	.ioctl =	nbd_ioctl,
 	.compat_ioctl =	nbd_ioctl,
+	.open = 	nbd_open,
+	.release = 	nbd_release
 };
 
+
 static void nbd_work_func(struct work_struct *ws_nbd)
 {
 	struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
@@ -1098,6 +1148,7 @@ static int __init nbd_init(void)
 		disk->first_minor = i << part_shift;
 		disk->fops = &nbd_fops;
 		disk->private_data = &nbd_dev[i];
+		kref_init(&nbd_dev[i].users);
 		sprintf(disk->disk_name, "nbd%d", i);
 		nbd_reset(&nbd_dev[i]);
 		add_disk(disk);
-- 
1.7.9.5

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 4/4] use device_attr macros for sysfs attribute
  2016-05-24 11:26 [PATCH 0/4]nbd: fixes for nbd Pranay Kr. Srivastava
                   ` (2 preceding siblings ...)
  2016-05-24 11:26 ` [PATCH 3/4] make nbd device wait for its users Pranay Kr. Srivastava
@ 2016-05-24 11:26 ` Pranay Kr. Srivastava
  2016-05-30  3:35 ` [PATCH 0/4]nbd: fixes for nbd Pranay Srivastava
  2016-05-30 12:27 ` Markus Pargmann
  5 siblings, 0 replies; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-05-24 11:26 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

This patch changes the pid sysfs device attribute to use
DEVICE_ATTR_* macro.

Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
---
 drivers/block/nbd.c |   10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 59db890..d6ab9c9 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -436,10 +436,8 @@ static ssize_t pid_show(struct device *dev,
 	return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
 }
 
-static struct device_attribute pid_attr = {
-	.attr = { .name = "pid", .mode = S_IRUGO},
-	.show = pid_show,
-};
+
+static DEVICE_ATTR_RO(pid);
 
 static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
 {
@@ -452,7 +450,7 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
 
 	nbd->task_recv = current;
 
-	ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
+	ret = device_create_file(disk_to_dev(nbd->disk), &dev_attr_pid);
 	if (ret) {
 		dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
 
@@ -475,7 +473,7 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
 
 	nbd_size_clear(nbd, bdev);
 
-	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+	device_remove_file(disk_to_dev(nbd->disk), &dev_attr_pid);
 
 	nbd->task_recv = NULL;
 
-- 
1.7.9.5

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 0/4]nbd: fixes for nbd
  2016-05-24 11:26 [PATCH 0/4]nbd: fixes for nbd Pranay Kr. Srivastava
                   ` (3 preceding siblings ...)
  2016-05-24 11:26 ` [PATCH 4/4] use device_attr macros for sysfs attribute Pranay Kr. Srivastava
@ 2016-05-30  3:35 ` Pranay Srivastava
  2016-05-30 12:27 ` Markus Pargmann
  5 siblings, 0 replies; 48+ messages in thread
From: Pranay Srivastava @ 2016-05-30  3:35 UTC (permalink / raw)
  To: Markus Pargmann, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

Hi

On Tue, May 24, 2016 at 4:56 PM, Pranay Kr. Srivastava
<pranjas@gmail.com> wrote:
> This patch series fixes the following
>
> 1) fix might_sleep warning on socket shutdown:
>    Fix sock_shutdown to avoid calling kernel_sock_shutdown
>    while holding spin_lock.
>
> 2)fix various coding standard warnings
>    Make shutdown get called in a process context instead, using
>    system_wq.
>
> 3) make nbd device wait for its users.
>    When a timeout or error occurs then nbd driver simply kills
>    the block device. Many filesystem(s) example ext2/ext3 don't
>    expect their buffer heads to disappear like that. Fix this
>    by making nbd device wait for its users.
>
> 4) use device_attr macros for sysfs attribute
>    use DEVICE_ATTR_RO for sysfs pid attribute.
>
> Pranay Kr. Srivastava (4):
>   fix might_sleep warning on socket shutdown.
>   fix various coding standard warnings
>   make nbd device wait for its users.
>   use device_attr macros for sysfs attribute
>
>  drivers/block/nbd.c |  150 +++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 105 insertions(+), 45 deletions(-)
>
> --
> 1.7.9.5
>

Can anyone review this?

-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 3/4] make nbd device wait for its users.
  2016-05-24 11:26 ` [PATCH 3/4] make nbd device wait for its users Pranay Kr. Srivastava
@ 2016-05-30 10:44   ` Markus Pargmann
  0 siblings, 0 replies; 48+ messages in thread
From: Markus Pargmann @ 2016-05-30 10:44 UTC (permalink / raw)
  To: Pranay Kr. Srivastava; +Cc: nbd-general, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 4815 bytes --]

Hi,

sorry I couldn't fit the review into last week.

On Tuesday 24 May 2016 14:26:27 Pranay Kr. Srivastava wrote:
> When a timeout occurs or a recv fails, then
> instead of abruplty killing nbd block device
> wait for it's users to finish.
> 
> This is more required when filesystem(s) like
> ext2 or ext3 don't expect their buffer heads to
> disappear while the filesystem is mounted.
> 
> Use a kref for users using this. The device will
> be released for kref count of 2, not less or more.
> 
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> ---
>  drivers/block/nbd.c |   51 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 51 insertions(+)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index af86c9b..59db890 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -71,6 +71,8 @@ struct nbd_device {
>  	struct dentry *dbg_dir;
>  #endif
>  	struct work_struct ws_nbd;
> +	struct kref users;
> +	struct completion user_completion;
>  };
>  
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -674,6 +676,7 @@ static void nbd_reset(struct nbd_device *nbd)
>  	nbd->flags = 0;
>  	nbd->xmit_timeout = 0;
>  	INIT_WORK(&nbd->ws_nbd, nbd_work_func);
> +	init_completion(&nbd->user_completion);
>  	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>  	del_timer_sync(&nbd->timeout_timer);
>  }
> @@ -807,6 +810,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>  		kthread_stop(thread);
>  
>  		sock_shutdown(nbd);
> +		wait_for_completion(&nbd->user_completion);
>  		mutex_lock(&nbd->tx_lock);
>  		nbd_clear_que(nbd);
>  		kill_bdev(bdev);
> @@ -858,12 +862,58 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
>  	return error;
>  }
>  
> +static void nbd_kref_release(struct kref *kref_users)
> +{
> +	struct nbd_device *nbd = container_of(kref_users, struct nbd_device,
> +			users);
> +	pr_debug("Releasing kref [%s]\n", __FUNCTION__);
> +	complete(&nbd->user_completion);
> +
> +}
> +
> +static int nbd_open(struct block_device *bdev, fmode_t mode)
> +{
> +	struct nbd_device *nbd_dev = bdev->bd_disk->private_data;
> +
> +	kref_get(&nbd_dev->users);
> +	pr_debug("Opening nbd_dev %s. Active users = %u\n",
> +			bdev->bd_disk->disk_name,
> +			atomic_read(&nbd_dev->users.refcount) - 1);
> +	return 0;
> +}
> +
> +static void nbd_release(struct gendisk *disk, fmode_t mode)
> +{
> +	struct nbd_device *nbd_dev = disk->private_data;
> +	/*
> +	*kref_init initializes ref count to 1, so we
> +	*we check for refcount to be 2 for a final put.
> +	*
> +	*kref needs to be re-initialized just here as the
> +	*other process holding it must see the ref count as 2.
> +	*/
> +	kref_put(&nbd_dev->users,  nbd_kref_release);
> +
> +	if (atomic_read(&nbd_dev->users.refcount) == 2) {
> +		kref_sub(&nbd_dev->users, 2, nbd_kref_release);
> +		kref_init(&nbd_dev->users);
> +		kref_get(&nbd_dev->users);

Reading the refcount directly seems not to be as it supposed to be.

Why don't you put a kref_init() and kref_put() call into NBD_DO_IT? This
way you don't have to work around the property that kref_init() starts
with a refcount of 1 but you can use it.

For example:
	NBD_DO_IT:
		kref_init()
		...
		kref_put()

	nbd_thread_recv() and nbd_thread_send():
		kref_get()
		...
		kref_put()

	In nbd_open() you could use kref_get_unless_zero() to avoid
	opening a not connected device.

	nbd_release() would then be a very simple kref_put() without
	checking for 2 and so on.

Also there are some checkpatch issues with this patch.

Best Regards,

Markus

> +	}
> +
> +	pr_debug("Closing nbd_dev %s. Active users = %u\n",
> +			disk->disk_name,
> +			atomic_read(&nbd_dev->users.refcount) - 1);
> +}
> +
>  static const struct block_device_operations nbd_fops = {
>  	.owner =	THIS_MODULE,
>  	.ioctl =	nbd_ioctl,
>  	.compat_ioctl =	nbd_ioctl,
> +	.open = 	nbd_open,
> +	.release = 	nbd_release
>  };
>  
> +
>  static void nbd_work_func(struct work_struct *ws_nbd)
>  {
>  	struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
> @@ -1098,6 +1148,7 @@ static int __init nbd_init(void)
>  		disk->first_minor = i << part_shift;
>  		disk->fops = &nbd_fops;
>  		disk->private_data = &nbd_dev[i];
> +		kref_init(&nbd_dev[i].users);
>  		sprintf(disk->disk_name, "nbd%d", i);
>  		nbd_reset(&nbd_dev[i]);
>  		add_disk(disk);
> 

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 1/4] fix might_sleep warning on socket shutdown.
  2016-05-24 11:26 ` [PATCH 1/4] fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
@ 2016-05-30 12:06   ` Markus Pargmann
  0 siblings, 0 replies; 48+ messages in thread
From: Markus Pargmann @ 2016-05-30 12:06 UTC (permalink / raw)
  To: Pranay Kr. Srivastava; +Cc: nbd-general, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 5804 bytes --]

Hi,

On Tuesday 24 May 2016 14:26:25 Pranay Kr. Srivastava wrote:
> spinlocked ranges should be small and not contain calls into huge
> subfunctions. Fix my mistake and just get the pointer to the socket
> instead of doing everything with spinlock held.
> 
> Reported-by: Mikulas Patocka <mikulas@twibright.com>
> Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
> 
> Changelog:
> Pranay Kr. Srivastava<pranjas@gmail.com>:
> 
> 1) Use spin_lock instead of irq version for sock_shutdown.
> 
> 2) Use system work queue to actually trigger the shutdown of
>    socket. This solves the issue when kernel_sendmsg is currently
>    blocked while a timeout occurs.
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> ---
>  drivers/block/nbd.c |   62 +++++++++++++++++++++++++++++----------------------
>  1 file changed, 35 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 08afbc7..a5dab48 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -39,6 +39,7 @@
>  #include <asm/types.h>
>  
>  #include <linux/nbd.h>
> +#include <linux/workqueue.h>
>  
>  struct nbd_device {
>  	u32 flags;
> @@ -57,7 +58,7 @@ struct nbd_device {
>  	int blksize;
>  	loff_t bytesize;
>  	int xmit_timeout;
> -	bool timedout;
> +	atomic_t timedout;

Why are you using atomic here? It seems you are just counting the
occurences of timeouts with this but never actually use that number. 

>  	bool disconnect; /* a disconnect has been requested by user */
>  
>  	struct timer_list timeout_timer;
> @@ -69,6 +70,7 @@ struct nbd_device {
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>  	struct dentry *dbg_dir;
>  #endif
> +	struct work_struct ws_nbd;

Can you rename this so that it is clear that this is a worker struct
specifically for the socket shutdown?

>  };
>  
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -94,6 +96,7 @@ static int max_part;
>   * Thanks go to Jens Axboe and Al Viro for their LKML emails explaining this!
>   */
>  static DEFINE_SPINLOCK(nbd_lock);
> +static void nbd_work_func(struct work_struct *);

Same here. nbd_work_func() doesn't give any hint about the real purpose
of the function.

>  
>  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>  {
> @@ -172,39 +175,31 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
>   */
>  static void sock_shutdown(struct nbd_device *nbd)
>  {
> -	spin_lock_irq(&nbd->sock_lock);
> -
> -	if (!nbd->sock) {
> -		spin_unlock_irq(&nbd->sock_lock);
> -		return;
> -	}
> +	struct socket *sock;
>  
> -	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
> -	kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
> -	sockfd_put(nbd->sock);
> +	spin_lock(&nbd->sock_lock);
> +	sock = nbd->sock;
>  	nbd->sock = NULL;
> -	spin_unlock_irq(&nbd->sock_lock);
> +	spin_unlock(&nbd->sock_lock);
> +
> +	if (!sock)
> +		return;
>  
>  	del_timer(&nbd->timeout_timer);
> +	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
> +	kernel_sock_shutdown(sock, SHUT_RDWR);
> +	sockfd_put(sock);
>  }
>  
>  static void nbd_xmit_timeout(unsigned long arg)
>  {
>  	struct nbd_device *nbd = (struct nbd_device *)arg;
> -	unsigned long flags;
>  
>  	if (list_empty(&nbd->queue_head))
>  		return;
> -
> -	spin_lock_irqsave(&nbd->sock_lock, flags);
> -
> -	nbd->timedout = true;
> -
> -	if (nbd->sock)
> -		kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
> -
> -	spin_unlock_irqrestore(&nbd->sock_lock, flags);
> -
> +	atomic_inc(&nbd->timedout);
> +	schedule_work(&nbd->ws_nbd);
> +	wake_up(&nbd->waiting_wq);
>  	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
>  }
>  
> @@ -592,7 +587,11 @@ static int nbd_thread_send(void *data)
>  		spin_unlock_irq(&nbd->queue_lock);
>  
>  		/* handle request */
> -		nbd_handle_req(nbd, req);
> +		if (atomic_read(&nbd->timedout)) {
> +			req->errors++;
> +			nbd_end_request(nbd, req);
> +		} else
> +			nbd_handle_req(nbd, req);
>  	}
>  
>  	nbd->task_send = NULL;
> @@ -666,12 +665,13 @@ out:
>  static void nbd_reset(struct nbd_device *nbd)
>  {
>  	nbd->disconnect = false;
> -	nbd->timedout = false;
> +	atomic_set(&nbd->timedout, 0);
>  	nbd->blksize = 1024;
>  	nbd->bytesize = 0;
>  	set_capacity(nbd->disk, 0);
>  	nbd->flags = 0;
>  	nbd->xmit_timeout = 0;
> +	INIT_WORK(&nbd->ws_nbd, nbd_work_func);
>  	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>  	del_timer_sync(&nbd->timeout_timer);
>  }
> @@ -804,16 +804,16 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>  		nbd_dev_dbg_close(nbd);
>  		kthread_stop(thread);
>  
> -		mutex_lock(&nbd->tx_lock);
> -
>  		sock_shutdown(nbd);
> +		mutex_lock(&nbd->tx_lock);
>  		nbd_clear_que(nbd);
>  		kill_bdev(bdev);
>  		nbd_bdev_reset(bdev);
>  
>  		if (nbd->disconnect) /* user requested, ignore socket errors */
>  			error = 0;
> -		if (nbd->timedout)
> +
> +		if (atomic_read(&nbd->timedout))
>  			error = -ETIMEDOUT;
>  
>  		nbd_reset(nbd);
> @@ -863,6 +863,14 @@ static const struct block_device_operations nbd_fops =
>  	.compat_ioctl =	nbd_ioctl,
>  };
>  
> +static void nbd_work_func(struct work_struct *ws_nbd)
> +{
> +	struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
> +								ws_nbd);

Please align line breaks on the opening bracket.

Best Regards,

Markus

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 0/4]nbd: fixes for nbd
  2016-05-24 11:26 [PATCH 0/4]nbd: fixes for nbd Pranay Kr. Srivastava
                   ` (4 preceding siblings ...)
  2016-05-30  3:35 ` [PATCH 0/4]nbd: fixes for nbd Pranay Srivastava
@ 2016-05-30 12:27 ` Markus Pargmann
  2016-06-02 10:24   ` [PATCH v2 0/5] nbd: " Pranay Kr. Srivastava
  5 siblings, 1 reply; 48+ messages in thread
From: Markus Pargmann @ 2016-05-30 12:27 UTC (permalink / raw)
  To: Pranay Kr. Srivastava; +Cc: nbd-general, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1229 bytes --]

Hi,

On Tuesday 24 May 2016 14:26:24 Pranay Kr. Srivastava wrote:
> This patch series fixes the following
> 
> 1) fix might_sleep warning on socket shutdown:
>    Fix sock_shutdown to avoid calling kernel_sock_shutdown
>    while holding spin_lock.
> 
> 2)fix various coding standard warnings
>    Make shutdown get called in a process context instead, using
>    system_wq.
> 
> 3) make nbd device wait for its users.
>    When a timeout or error occurs then nbd driver simply kills
>    the block device. Many filesystem(s) example ext2/ext3 don't
>    expect their buffer heads to disappear like that. Fix this
>    by making nbd device wait for its users.
> 
> 4) use device_attr macros for sysfs attribute
>    use DEVICE_ATTR_RO for sysfs pid attribute.

Thanks, I applied patches 2 and 4. For the next version please add a
"nbd: " prefix to the other patches.

Best Regards,

Markus

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v2 0/5] nbd: fixes for nbd
  2016-05-30 12:27 ` Markus Pargmann
@ 2016-06-02 10:24   ` Pranay Kr. Srivastava
  2016-06-02 10:24     ` [PATCH v2 1/5] nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
                       ` (5 more replies)
  0 siblings, 6 replies; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-06-02 10:24 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

This patch series fixes the following

1) fix might_sleep warning on socket shutdown:
   Fix sock_shutdown to avoid calling kernel_sock_shutdown
   while holding spin_lock.

2) cleanup nbd_set_socket
   Cleanup nbd_set_socket to use spin_lock instead of
   irq version and remove the goto statement in favour
   of a simple if-else statement.

3) fix various coding standard warnings
   Make shutdown get called in a process context instead, using
   system_wq.

4) make nbd device wait for its users.
   When a timeout or error occurs then nbd driver simply kills
   the block device. Many filesystem(s) example ext2/ext3 don't
   expect their buffer heads to disappear like that. Fix this
   by making nbd device wait for its users.

   Introduced a new field to check if the device is currently
   in use or not. This helps to check if the kref_put should
   be done on device release or not.
   
   This field needs to be atomic as the release function may
   be called from NBD_DO_IT as well as from device's release
   function.

5) use device_attr macros for sysfs attribute
   use DEVICE_ATTR_RO for sysfs pid attribute.

Changelog for v2:
1) fix might_sleep warning on socket shutdown
   use bool timedout instead of atomic

2) cleanup nbd_set_socket
   Added this new patch to this series.

3) fix various coding standard warnings
   No Change.

4) make nbd device wait for its users
   Earlier version used to do a final kref put when
   the kref->counter == 2. This required a check of
   the internal atomic counter of kref which was ugly.

   v2 of this patch make this more readable and doesn't
   do manual check of the internal counter used by kref.

5) use device_attr macros for sysfs attribute
   No Change.

Pranay Kr. Srivastava (5):
  fix might_sleep warning on socket shutdown.
  cleanup nbd_set_socket
  fix various coding standard warnings
  make nbd device wait for its users.
  use device_attr macros for sysfs attribute

 drivers/block/nbd.c | 173 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 124 insertions(+), 49 deletions(-)

-- 
2.6.2

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v2 1/5] nbd: fix might_sleep warning on socket shutdown.
  2016-06-02 10:24   ` [PATCH v2 0/5] nbd: " Pranay Kr. Srivastava
@ 2016-06-02 10:24     ` Pranay Kr. Srivastava
  2016-06-09 10:03       ` Pranay Srivastava
  2016-06-14  8:52       ` Markus Pargmann
  2016-06-02 10:24     ` [PATCH v2 2/5]nbd: cleanup nbd_set_socket Pranay Kr. Srivastava
                       ` (4 subsequent siblings)
  5 siblings, 2 replies; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-06-02 10:24 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

spinlocked ranges should be small and not contain calls into huge
subfunctions. Fix my mistake and just get the pointer to the socket
instead of doing everything with spinlock held.

Reported-by: Mikulas Patocka <mikulas@twibright.com>
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>

Changelog:
Pranay Kr. Srivastava<pranjas@gmail.com>:

1) Use spin_lock instead of irq version for sock_shutdown.

2) Use system work queue to actually trigger the shutdown of
   socket. This solves the issue when kernel_sendmsg is currently
   blocked while a timeout occurs.

Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
---
 drivers/block/nbd.c | 65 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 31e73a7..0339d40 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -39,6 +39,7 @@
 #include <asm/types.h>
 
 #include <linux/nbd.h>
+#include <linux/workqueue.h>
 
 struct nbd_device {
 	u32 flags;
@@ -69,6 +70,10 @@ struct nbd_device {
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct dentry *dbg_dir;
 #endif
+	/*
+	 *This is specifically for calling sock_shutdown, for now.
+	 */
+	struct work_struct ws_shutdown;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -95,6 +100,11 @@ static int max_part;
  */
 static DEFINE_SPINLOCK(nbd_lock);
 
+/*
+ * Shutdown function for nbd_dev work struct.
+ */
+static void nbd_ws_func_shutdown(struct work_struct *);
+
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
 	return disk_to_dev(nbd->disk);
@@ -172,39 +182,35 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
  */
 static void sock_shutdown(struct nbd_device *nbd)
 {
-	spin_lock_irq(&nbd->sock_lock);
-
-	if (!nbd->sock) {
-		spin_unlock_irq(&nbd->sock_lock);
-		return;
-	}
+	struct socket *sock;
 
-	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
-	kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
-	sockfd_put(nbd->sock);
+	spin_lock(&nbd->sock_lock);
+	sock = nbd->sock;
 	nbd->sock = NULL;
-	spin_unlock_irq(&nbd->sock_lock);
+	spin_unlock(&nbd->sock_lock);
+
+	if (!sock)
+		return;
 
 	del_timer(&nbd->timeout_timer);
+	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
+	kernel_sock_shutdown(sock, SHUT_RDWR);
+	sockfd_put(sock);
 }
 
 static void nbd_xmit_timeout(unsigned long arg)
 {
 	struct nbd_device *nbd = (struct nbd_device *)arg;
-	unsigned long flags;
 
 	if (list_empty(&nbd->queue_head))
 		return;
-
-	spin_lock_irqsave(&nbd->sock_lock, flags);
-
 	nbd->timedout = true;
-
-	if (nbd->sock)
-		kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
-
-	spin_unlock_irqrestore(&nbd->sock_lock, flags);
-
+	schedule_work(&nbd->ws_shutdown);
+	/*
+	 * Make sure sender thread sees nbd->timedout.
+	 */
+	smp_wmb();
+	wake_up(&nbd->waiting_wq);
 	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
 }
 
@@ -592,7 +598,11 @@ static int nbd_thread_send(void *data)
 		spin_unlock_irq(&nbd->queue_lock);
 
 		/* handle request */
-		nbd_handle_req(nbd, req);
+		if (nbd->timedout) {
+			req->errors++;
+			nbd_end_request(nbd, req);
+		} else
+			nbd_handle_req(nbd, req);
 	}
 
 	nbd->task_send = NULL;
@@ -672,6 +682,7 @@ static void nbd_reset(struct nbd_device *nbd)
 	set_capacity(nbd->disk, 0);
 	nbd->flags = 0;
 	nbd->xmit_timeout = 0;
+	INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
 	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 	del_timer_sync(&nbd->timeout_timer);
 }
@@ -804,15 +815,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		nbd_dev_dbg_close(nbd);
 		kthread_stop(thread);
 
-		mutex_lock(&nbd->tx_lock);
-
 		sock_shutdown(nbd);
+		mutex_lock(&nbd->tx_lock);
 		nbd_clear_que(nbd);
 		kill_bdev(bdev);
 		nbd_bdev_reset(bdev);
 
 		if (nbd->disconnect) /* user requested, ignore socket errors */
 			error = 0;
+
 		if (nbd->timedout)
 			error = -ETIMEDOUT;
 
@@ -863,6 +874,14 @@ static const struct block_device_operations nbd_fops =
 	.compat_ioctl =	nbd_ioctl,
 };
 
+static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
+{
+	struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
+							ws_shutdown);
+
+	sock_shutdown(nbd_dev);
+}
+
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 
 static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
-- 
2.6.2

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v2 2/5]nbd: cleanup nbd_set_socket
  2016-06-02 10:24   ` [PATCH v2 0/5] nbd: " Pranay Kr. Srivastava
  2016-06-02 10:24     ` [PATCH v2 1/5] nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
@ 2016-06-02 10:24     ` Pranay Kr. Srivastava
  2016-06-02 10:24     ` [PATCH v2 3/5]nbd: fix various coding standard warnings Pranay Kr. Srivastava
                       ` (3 subsequent siblings)
  5 siblings, 0 replies; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-06-02 10:24 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

This patch
1) uses spin_lock instead of irq version.

2) removes the goto statement in case a socket
   is already assigned with simple if-else statement.

Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
---
 drivers/block/nbd.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 0339d40..da2b0a4 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -657,17 +657,14 @@ static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
 {
 	int ret = 0;
 
-	spin_lock_irq(&nbd->sock_lock);
+	spin_lock(&nbd->sock_lock);
 
-	if (nbd->sock) {
+	if (nbd->sock)
 		ret = -EBUSY;
-		goto out;
-	}
-
-	nbd->sock = sock;
+	else
+		nbd->sock = sock;
 
-out:
-	spin_unlock_irq(&nbd->sock_lock);
+	spin_unlock(&nbd->sock_lock);
 
 	return ret;
 }
-- 
2.6.2

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v2 3/5]nbd: fix various coding standard warnings
  2016-06-02 10:24   ` [PATCH v2 0/5] nbd: " Pranay Kr. Srivastava
  2016-06-02 10:24     ` [PATCH v2 1/5] nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
  2016-06-02 10:24     ` [PATCH v2 2/5]nbd: cleanup nbd_set_socket Pranay Kr. Srivastava
@ 2016-06-02 10:24     ` Pranay Kr. Srivastava
  2016-06-02 10:25     ` [PATCH v2 4/5]nbd: make nbd device wait for its users Pranay Kr. Srivastava
                       ` (2 subsequent siblings)
  5 siblings, 0 replies; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-06-02 10:24 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

1 )nbd: fix checkpatch trailing space warning.

2) nbd: fix checkpatch warning use linux/uaccess.h

3) nbd : fix checkpatch pointer declaration warning

4) nbd: fix checkpatch warning no newline after decleration.

5) nbd: fix checkpatch warning no newline after decleration.

6) nbd : fix checkpatch line over 80 char warning

7) nbd: fix checkpatch trailing whitespace warning.

8) nbd: fix checkpatch trailing whitespace warning.

9) nbd : fix checkpatch structure declaration braces on next line warning.

10) nbd : fix checkpatch trailing whitespace warning

11) nbd : fix checkpatch printk warning

12) nbd: fix checkpatch no extra line after decleration warning

13) nbd: fix checkpatch printk warning to pr_info

14) nbd: fix checkpatch no new line after decleration warning

15) nbd: fix checkpatch printk warning to pr_info

Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
---
 drivers/block/nbd.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index da2b0a4..d1d898d 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -3,7 +3,7 @@
  *
  * Note that you can not swap over this thing, yet. Seems to work but
  * deadlocks sometimes - you can not swap over TCP in general.
- * 
+ *
  * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
  * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
  *
@@ -35,7 +35,7 @@
 #include <linux/types.h>
 #include <linux/debugfs.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/types.h>
 
 #include <linux/nbd.h>
@@ -43,7 +43,7 @@
 
 struct nbd_device {
 	u32 flags;
-	struct socket * sock;	/* If == NULL, device is not ready, yet	*/
+	struct socket *sock;	/* If == NULL, device is not ready, yet	*/
 	int magic;
 
 	spinlock_t queue_lock;
@@ -272,6 +272,7 @@ static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec,
 {
 	int result;
 	void *kaddr = kmap(bvec->bv_page);
+
 	result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset,
 			   bvec->bv_len, flags);
 	kunmap(bvec->bv_page);
@@ -369,6 +370,7 @@ static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
 {
 	int result;
 	void *kaddr = kmap(bvec->bv_page);
+
 	result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len,
 			MSG_WAITALL);
 	kunmap(bvec->bv_page);
@@ -611,8 +613,8 @@ static int nbd_thread_send(void *data)
 }
 
 /*
- * We always wait for result of write, for now. It would be nice to make it optional
- * in future
+ * We always wait for result of write, for now. It would be nice to make it
+ * optional in future
  * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK))
  *   { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
  */
@@ -621,7 +623,7 @@ static void nbd_request_handler(struct request_queue *q)
 		__releases(q->queue_lock) __acquires(q->queue_lock)
 {
 	struct request *req;
-	
+
 	while ((req = blk_fetch_request(q)) != NULL) {
 		struct nbd_device *nbd;
 
@@ -737,7 +739,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		nbd_send_req(nbd, &sreq);
 		return 0;
 	}
- 
+
 	case NBD_CLEAR_SOCK:
 		sock_shutdown(nbd);
 		nbd_clear_que(nbd);
@@ -864,8 +866,7 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 	return error;
 }
 
-static const struct block_device_operations nbd_fops =
-{
+static const struct block_device_operations nbd_fops = {
 	.owner =	THIS_MODULE,
 	.ioctl =	nbd_ioctl,
 	.compat_ioctl =	nbd_ioctl,
@@ -1008,7 +1009,7 @@ static void nbd_dbg_close(void)
 #endif
 
 /*
- * And here should be modules and kernel interface 
+ * And here should be modules and kernel interface
  *  (Just smiley confuses emacs :-)
  */
 
@@ -1021,7 +1022,7 @@ static int __init nbd_init(void)
 	BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
 
 	if (max_part < 0) {
-		printk(KERN_ERR "nbd: max_part must be >= 0\n");
+		pr_err("nbd: max_part must be >= 0\n");
 		return -EINVAL;
 	}
 
@@ -1052,6 +1053,7 @@ static int __init nbd_init(void)
 
 	for (i = 0; i < nbds_max; i++) {
 		struct gendisk *disk = alloc_disk(1 << part_shift);
+
 		if (!disk)
 			goto out;
 		nbd_dev[i].disk = disk;
@@ -1082,12 +1084,13 @@ static int __init nbd_init(void)
 		goto out;
 	}
 
-	printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
+	pr_info("nbd: registered device at major %d\n", NBD_MAJOR);
 
 	nbd_dbg_init();
 
 	for (i = 0; i < nbds_max; i++) {
 		struct gendisk *disk = nbd_dev[i].disk;
+
 		nbd_dev[i].magic = NBD_MAGIC;
 		INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
 		spin_lock_init(&nbd_dev[i].queue_lock);
@@ -1135,7 +1138,7 @@ static void __exit nbd_cleanup(void)
 	}
 	unregister_blkdev(NBD_MAJOR, "nbd");
 	kfree(nbd_dev);
-	printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR);
+	pr_info("nbd: unregistered device at major %d\n", NBD_MAJOR);
 }
 
 module_init(nbd_init);
-- 
2.6.2

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v2 4/5]nbd: make nbd device wait for its users.
  2016-06-02 10:24   ` [PATCH v2 0/5] nbd: " Pranay Kr. Srivastava
                       ` (2 preceding siblings ...)
  2016-06-02 10:24     ` [PATCH v2 3/5]nbd: fix various coding standard warnings Pranay Kr. Srivastava
@ 2016-06-02 10:25     ` Pranay Kr. Srivastava
  2016-06-14  8:59       ` Markus Pargmann
  2016-06-02 10:25     ` [PATCH v2 5/5]nbd: use device_attr macros for sysfs attribute Pranay Kr. Srivastava
       [not found]     ` <CA+aCy1E0S4ofa04xcO9qxQmuipaF5wdnrv3ubSvETn-rBYYisA@mail.gmail.com>
  5 siblings, 1 reply; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-06-02 10:25 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

When a timeout occurs or a recv fails, then
instead of abruplty killing nbd block device
wait for it's users to finish.

This is more required when filesystem(s) like
ext2 or ext3 don't expect their buffer heads to
disappear while the filesystem is mounted.

Each open of a nbd device is refcounted, while
the userland program [nbd-client] doing the
NBD_DO_IT ioctl would now wait for any other users
of this device before invalidating the nbd device.

Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
---
 drivers/block/nbd.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index d1d898d..4da40dc 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -70,10 +70,13 @@ struct nbd_device {
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct dentry *dbg_dir;
 #endif
+	atomic_t inuse;
 	/*
 	 *This is specifically for calling sock_shutdown, for now.
 	 */
 	struct work_struct ws_shutdown;
+	struct kref users;
+	struct completion user_completion;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -104,6 +107,7 @@ static DEFINE_SPINLOCK(nbd_lock);
  * Shutdown function for nbd_dev work struct.
  */
 static void nbd_ws_func_shutdown(struct work_struct *);
+static void nbd_kref_release(struct kref *);
 
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
@@ -682,6 +686,8 @@ static void nbd_reset(struct nbd_device *nbd)
 	nbd->flags = 0;
 	nbd->xmit_timeout = 0;
 	INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
+	init_completion(&nbd->user_completion);
+	kref_init(&nbd->users);
 	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 	del_timer_sync(&nbd->timeout_timer);
 }
@@ -815,6 +821,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		kthread_stop(thread);
 
 		sock_shutdown(nbd);
+		/*
+		 * kref_init initializes with ref count as 1,
+		 * nbd_client, or the user-land program executing
+		 * this ioctl will make the refcount to 2[at least]
+		 * so subtracting 2 from refcount.
+		 */
+		kref_sub(&nbd->users, 2, nbd_kref_release);
+		wait_for_completion(&nbd->user_completion);
 		mutex_lock(&nbd->tx_lock);
 		nbd_clear_que(nbd);
 		kill_bdev(bdev);
@@ -865,13 +879,56 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 
 	return error;
 }
+static void nbd_kref_release(struct kref *kref_users)
+{
+	struct nbd_device *nbd = container_of(kref_users, struct nbd_device,
+			users);
+	pr_debug("Releasing kref [%s]\n", __func__);
+	atomic_set(&nbd->inuse, 0);
+	complete(&nbd->user_completion);
+
+}
+
+static int nbd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct nbd_device *nbd_dev = bdev->bd_disk->private_data;
+
+	if (kref_get_unless_zero(&nbd_dev->users))
+		atomic_set(&nbd_dev->inuse, 1);
+
+	pr_debug("Opening nbd_dev %s. Active users = %u\n",
+			bdev->bd_disk->disk_name,
+			atomic_read(&nbd_dev->users.refcount) - 1);
+	return 0;
+}
+
+static void nbd_release(struct gendisk *disk, fmode_t mode)
+{
+	struct nbd_device *nbd_dev = disk->private_data;
+	/*
+	*kref_init initializes ref count to 1, so we
+	*we check for refcount to be 2 for a final put.
+	*
+	*kref needs to be re-initialized just here as the
+	*other process holding it must see the ref count as 2.
+	*/
+	if (atomic_read(&nbd_dev->inuse))
+		kref_put(&nbd_dev->users,  nbd_kref_release);
+
+	pr_debug("Closing nbd_dev %s. Active users = %u\n",
+			disk->disk_name,
+			atomic_read(&nbd_dev->users.refcount) - 1);
+}
 
 static const struct block_device_operations nbd_fops = {
 	.owner =	THIS_MODULE,
 	.ioctl =	nbd_ioctl,
 	.compat_ioctl =	nbd_ioctl,
+	.open =		nbd_open,
+	.release =	nbd_release
 };
 
+
 static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
 {
 	struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
@@ -1107,6 +1164,7 @@ static int __init nbd_init(void)
 		disk->fops = &nbd_fops;
 		disk->private_data = &nbd_dev[i];
 		sprintf(disk->disk_name, "nbd%d", i);
+		atomic_set(&nbd_dev[i].inuse, 0);
 		nbd_reset(&nbd_dev[i]);
 		add_disk(disk);
 	}
-- 
2.6.2

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v2 5/5]nbd: use device_attr macros for sysfs attribute
  2016-06-02 10:24   ` [PATCH v2 0/5] nbd: " Pranay Kr. Srivastava
                       ` (3 preceding siblings ...)
  2016-06-02 10:25     ` [PATCH v2 4/5]nbd: make nbd device wait for its users Pranay Kr. Srivastava
@ 2016-06-02 10:25     ` Pranay Kr. Srivastava
       [not found]     ` <CA+aCy1E0S4ofa04xcO9qxQmuipaF5wdnrv3ubSvETn-rBYYisA@mail.gmail.com>
  5 siblings, 0 replies; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-06-02 10:25 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

This patch changes the pid sysfs device attribute to use
DEVICE_ATTR_* macro.

Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
---
 drivers/block/nbd.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 4da40dc..323ab26 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -449,10 +449,8 @@ static ssize_t pid_show(struct device *dev,
 	return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
 }
 
-static struct device_attribute pid_attr = {
-	.attr = { .name = "pid", .mode = S_IRUGO},
-	.show = pid_show,
-};
+
+static DEVICE_ATTR_RO(pid);
 
 static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
 {
@@ -465,7 +463,7 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
 
 	nbd->task_recv = current;
 
-	ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
+	ret = device_create_file(disk_to_dev(nbd->disk), &dev_attr_pid);
 	if (ret) {
 		dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
 
@@ -488,7 +486,7 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
 
 	nbd_size_clear(nbd, bdev);
 
-	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+	device_remove_file(disk_to_dev(nbd->disk), &dev_attr_pid);
 
 	nbd->task_recv = NULL;
 
-- 
2.6.2

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 0/4]nbd: fixes for nbd
       [not found]     ` <CA+aCy1E0S4ofa04xcO9qxQmuipaF5wdnrv3ubSvETn-rBYYisA@mail.gmail.com>
@ 2016-06-06 11:07       ` Pranay Srivastava
  0 siblings, 0 replies; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-06 11:07 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

This got rejected by mailing-list[due to HTML content] so resending it again.

On Sat, Jun 4, 2016 at 10:25 AM, Pranay Srivastava <pranjas@gmail.com> wrote:
>
>
> On Thursday, June 2, 2016, Pranay Kr. Srivastava <pranjas@gmail.com> wrote:
>> This patch series fixes the following
>>
>> 1) fix might_sleep warning on socket shutdown:
>>    Fix sock_shutdown to avoid calling kernel_sock_shutdown
>>    while holding spin_lock.
>>
>> 2) cleanup nbd_set_socket
>>    Cleanup nbd_set_socket to use spin_lock instead of
>>    irq version and remove the goto statement in favour
>>    of a simple if-else statement.
>>
>> 3) fix various coding standard warnings
>>    Make shutdown get called in a process context instead, using
>>    system_wq.
>>
>> 4) make nbd device wait for its users.
>>    When a timeout or error occurs then nbd driver simply kills
>>    the block device. Many filesystem(s) example ext2/ext3 don't
>>    expect their buffer heads to disappear like that. Fix this
>>    by making nbd device wait for its users.
>>
>>    Introduced a new field to check if the device is currently
>>    in use or not. This helps to check if the kref_put should
>>    be done on device release or not.
>>
>>    This field needs to be atomic as the release function may
>>    be called from NBD_DO_IT as well as from device's release
>>    function.
>>
>> 5) use device_attr macros for sysfs attribute
>>    use DEVICE_ATTR_RO for sysfs pid attribute.
>>
>> Changelog for v2:
>> 1) fix might_sleep warning on socket shutdown
>>    use bool timedout instead of atomic
>>
>> 2) cleanup nbd_set_socket
>>    Added this new patch to this series.
>>
>> 3) fix various coding standard warnings
>>    No Change.
>>
>> 4) make nbd device wait for its users
>>    Earlier version used to do a final kref put when
>>    the kref->counter == 2. This required a check of
>>    the internal atomic counter of kref which was ugly.
>>
>>    v2 of this patch make this more readable and doesn't
>>    do manual check of the internal counter used by kref.
>>
>> 5) use device_attr macros for sysfs attribute
>>    No Change.
>>
>> Pranay Kr. Srivastava (5):
>>   fix might_sleep warning on socket shutdown.
>>   cleanup nbd_set_socket
>>   fix various coding standard warnings
>>   make nbd device wait for its users.
>>   use device_attr macros for sysfs attribute
>>
>>  drivers/block/nbd.c | 173
>> +++++++++++++++++++++++++++++++++++++---------------
>>  1 file changed, 124 insertions(+), 49 deletions(-)
>>
>> --
>> 2.6.2
>>
>>
> Markus can you please review this series.
>
> --
>         ---P.K.S
>

Can anyone kindly review this series?


-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 1/5] nbd: fix might_sleep warning on socket shutdown.
  2016-06-02 10:24     ` [PATCH v2 1/5] nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
@ 2016-06-09 10:03       ` Pranay Srivastava
  2016-06-14  5:13         ` Pranay Srivastava
  2016-06-14  8:52       ` Markus Pargmann
  1 sibling, 1 reply; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-09 10:03 UTC (permalink / raw)
  To: Markus Pargmann, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

Hello


On Thu, Jun 2, 2016 at 3:54 PM, Pranay Kr. Srivastava <pranjas@gmail.com> wrote:
> spinlocked ranges should be small and not contain calls into huge
> subfunctions. Fix my mistake and just get the pointer to the socket
> instead of doing everything with spinlock held.
>
> Reported-by: Mikulas Patocka <mikulas@twibright.com>
> Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
>
> Changelog:
> Pranay Kr. Srivastava<pranjas@gmail.com>:
>
> 1) Use spin_lock instead of irq version for sock_shutdown.
>
> 2) Use system work queue to actually trigger the shutdown of
>    socket. This solves the issue when kernel_sendmsg is currently
>    blocked while a timeout occurs.
>
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> ---
>  drivers/block/nbd.c | 65 ++++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 42 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 31e73a7..0339d40 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -39,6 +39,7 @@
>  #include <asm/types.h>
>
>  #include <linux/nbd.h>
> +#include <linux/workqueue.h>
>
>  struct nbd_device {
>         u32 flags;
> @@ -69,6 +70,10 @@ struct nbd_device {
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>         struct dentry *dbg_dir;
>  #endif
> +       /*
> +        *This is specifically for calling sock_shutdown, for now.
> +        */
> +       struct work_struct ws_shutdown;
>  };
>
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -95,6 +100,11 @@ static int max_part;
>   */
>  static DEFINE_SPINLOCK(nbd_lock);
>
> +/*
> + * Shutdown function for nbd_dev work struct.
> + */
> +static void nbd_ws_func_shutdown(struct work_struct *);
> +
>  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>  {
>         return disk_to_dev(nbd->disk);
> @@ -172,39 +182,35 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
>   */
>  static void sock_shutdown(struct nbd_device *nbd)
>  {
> -       spin_lock_irq(&nbd->sock_lock);
> -
> -       if (!nbd->sock) {
> -               spin_unlock_irq(&nbd->sock_lock);
> -               return;
> -       }
> +       struct socket *sock;
>
> -       dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
> -       kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
> -       sockfd_put(nbd->sock);
> +       spin_lock(&nbd->sock_lock);
> +       sock = nbd->sock;
>         nbd->sock = NULL;
> -       spin_unlock_irq(&nbd->sock_lock);
> +       spin_unlock(&nbd->sock_lock);
> +
> +       if (!sock)
> +               return;
>
>         del_timer(&nbd->timeout_timer);
> +       dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
> +       kernel_sock_shutdown(sock, SHUT_RDWR);
> +       sockfd_put(sock);
>  }
>
>  static void nbd_xmit_timeout(unsigned long arg)
>  {
>         struct nbd_device *nbd = (struct nbd_device *)arg;
> -       unsigned long flags;
>
>         if (list_empty(&nbd->queue_head))
>                 return;
> -
> -       spin_lock_irqsave(&nbd->sock_lock, flags);
> -
>         nbd->timedout = true;
> -
> -       if (nbd->sock)
> -               kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
> -
> -       spin_unlock_irqrestore(&nbd->sock_lock, flags);
> -
> +       schedule_work(&nbd->ws_shutdown);
> +       /*
> +        * Make sure sender thread sees nbd->timedout.
> +        */
> +       smp_wmb();
> +       wake_up(&nbd->waiting_wq);
>         dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
>  }
>
> @@ -592,7 +598,11 @@ static int nbd_thread_send(void *data)
>                 spin_unlock_irq(&nbd->queue_lock);
>
>                 /* handle request */
> -               nbd_handle_req(nbd, req);
> +               if (nbd->timedout) {
> +                       req->errors++;
> +                       nbd_end_request(nbd, req);
> +               } else
> +                       nbd_handle_req(nbd, req);
>         }
>
>         nbd->task_send = NULL;
> @@ -672,6 +682,7 @@ static void nbd_reset(struct nbd_device *nbd)
>         set_capacity(nbd->disk, 0);
>         nbd->flags = 0;
>         nbd->xmit_timeout = 0;
> +       INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
>         queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>         del_timer_sync(&nbd->timeout_timer);
>  }
> @@ -804,15 +815,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>                 nbd_dev_dbg_close(nbd);
>                 kthread_stop(thread);
>
> -               mutex_lock(&nbd->tx_lock);
> -
>                 sock_shutdown(nbd);
> +               mutex_lock(&nbd->tx_lock);
>                 nbd_clear_que(nbd);
>                 kill_bdev(bdev);
>                 nbd_bdev_reset(bdev);
>
>                 if (nbd->disconnect) /* user requested, ignore socket errors */
>                         error = 0;
> +
>                 if (nbd->timedout)
>                         error = -ETIMEDOUT;
>
> @@ -863,6 +874,14 @@ static const struct block_device_operations nbd_fops =
>         .compat_ioctl = nbd_ioctl,
>  };
>
> +static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
> +{
> +       struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
> +                                                       ws_shutdown);
> +
> +       sock_shutdown(nbd_dev);
> +}
> +
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>
>  static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
> --
> 2.6.2
>


Any update for the above patch series?
-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 1/5] nbd: fix might_sleep warning on socket shutdown.
  2016-06-09 10:03       ` Pranay Srivastava
@ 2016-06-14  5:13         ` Pranay Srivastava
  0 siblings, 0 replies; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-14  5:13 UTC (permalink / raw)
  To: Markus Pargmann, nbd-general, linux-kernel; +Cc: Pranay Kr. Srivastava

Hello,

On Thu, Jun 9, 2016 at 3:33 PM, Pranay Srivastava <pranjas@gmail.com> wrote:
>
> Hello
>
>
> On Thu, Jun 2, 2016 at 3:54 PM, Pranay Kr. Srivastava <pranjas@gmail.com> wrote:
> > spinlocked ranges should be small and not contain calls into huge
> > subfunctions. Fix my mistake and just get the pointer to the socket
> > instead of doing everything with spinlock held.
> >
> > Reported-by: Mikulas Patocka <mikulas@twibright.com>
> > Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
> >
> > Changelog:
> > Pranay Kr. Srivastava<pranjas@gmail.com>:
> >
> > 1) Use spin_lock instead of irq version for sock_shutdown.
> >
> > 2) Use system work queue to actually trigger the shutdown of
> >    socket. This solves the issue when kernel_sendmsg is currently
> >    blocked while a timeout occurs.
> >
> > Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> > ---
> >  drivers/block/nbd.c | 65 ++++++++++++++++++++++++++++++++++-------------------
> >  1 file changed, 42 insertions(+), 23 deletions(-)
> >
> > diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> > index 31e73a7..0339d40 100644
> > --- a/drivers/block/nbd.c
> > +++ b/drivers/block/nbd.c
> > @@ -39,6 +39,7 @@
> >  #include <asm/types.h>
> >
> >  #include <linux/nbd.h>
> > +#include <linux/workqueue.h>
> >
> >  struct nbd_device {
> >         u32 flags;
> > @@ -69,6 +70,10 @@ struct nbd_device {
> >  #if IS_ENABLED(CONFIG_DEBUG_FS)
> >         struct dentry *dbg_dir;
> >  #endif
> > +       /*
> > +        *This is specifically for calling sock_shutdown, for now.
> > +        */
> > +       struct work_struct ws_shutdown;
> >  };
> >
> >  #if IS_ENABLED(CONFIG_DEBUG_FS)
> > @@ -95,6 +100,11 @@ static int max_part;
> >   */
> >  static DEFINE_SPINLOCK(nbd_lock);
> >
> > +/*
> > + * Shutdown function for nbd_dev work struct.
> > + */
> > +static void nbd_ws_func_shutdown(struct work_struct *);
> > +
> >  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
> >  {
> >         return disk_to_dev(nbd->disk);
> > @@ -172,39 +182,35 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
> >   */
> >  static void sock_shutdown(struct nbd_device *nbd)
> >  {
> > -       spin_lock_irq(&nbd->sock_lock);
> > -
> > -       if (!nbd->sock) {
> > -               spin_unlock_irq(&nbd->sock_lock);
> > -               return;
> > -       }
> > +       struct socket *sock;
> >
> > -       dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
> > -       kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
> > -       sockfd_put(nbd->sock);
> > +       spin_lock(&nbd->sock_lock);
> > +       sock = nbd->sock;
> >         nbd->sock = NULL;
> > -       spin_unlock_irq(&nbd->sock_lock);
> > +       spin_unlock(&nbd->sock_lock);
> > +
> > +       if (!sock)
> > +               return;
> >
> >         del_timer(&nbd->timeout_timer);
> > +       dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
> > +       kernel_sock_shutdown(sock, SHUT_RDWR);
> > +       sockfd_put(sock);
> >  }
> >
> >  static void nbd_xmit_timeout(unsigned long arg)
> >  {
> >         struct nbd_device *nbd = (struct nbd_device *)arg;
> > -       unsigned long flags;
> >
> >         if (list_empty(&nbd->queue_head))
> >                 return;
> > -
> > -       spin_lock_irqsave(&nbd->sock_lock, flags);
> > -
> >         nbd->timedout = true;
> > -
> > -       if (nbd->sock)
> > -               kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
> > -
> > -       spin_unlock_irqrestore(&nbd->sock_lock, flags);
> > -
> > +       schedule_work(&nbd->ws_shutdown);
> > +       /*
> > +        * Make sure sender thread sees nbd->timedout.
> > +        */
> > +       smp_wmb();
> > +       wake_up(&nbd->waiting_wq);
> >         dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
> >  }
> >
> > @@ -592,7 +598,11 @@ static int nbd_thread_send(void *data)
> >                 spin_unlock_irq(&nbd->queue_lock);
> >
> >                 /* handle request */
> > -               nbd_handle_req(nbd, req);
> > +               if (nbd->timedout) {
> > +                       req->errors++;
> > +                       nbd_end_request(nbd, req);
> > +               } else
> > +                       nbd_handle_req(nbd, req);
> >         }
> >
> >         nbd->task_send = NULL;
> > @@ -672,6 +682,7 @@ static void nbd_reset(struct nbd_device *nbd)
> >         set_capacity(nbd->disk, 0);
> >         nbd->flags = 0;
> >         nbd->xmit_timeout = 0;
> > +       INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
> >         queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
> >         del_timer_sync(&nbd->timeout_timer);
> >  }
> > @@ -804,15 +815,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
> >                 nbd_dev_dbg_close(nbd);
> >                 kthread_stop(thread);
> >
> > -               mutex_lock(&nbd->tx_lock);
> > -
> >                 sock_shutdown(nbd);
> > +               mutex_lock(&nbd->tx_lock);
> >                 nbd_clear_que(nbd);
> >                 kill_bdev(bdev);
> >                 nbd_bdev_reset(bdev);
> >
> >                 if (nbd->disconnect) /* user requested, ignore socket errors */
> >                         error = 0;
> > +
> >                 if (nbd->timedout)
> >                         error = -ETIMEDOUT;
> >
> > @@ -863,6 +874,14 @@ static const struct block_device_operations nbd_fops =
> >         .compat_ioctl = nbd_ioctl,
> >  };
> >
> > +static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
> > +{
> > +       struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
> > +                                                       ws_shutdown);
> > +
> > +       sock_shutdown(nbd_dev);
> > +}
> > +
> >  #if IS_ENABLED(CONFIG_DEBUG_FS)
> >
> >  static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
> > --
> > 2.6.2
> >
>
>
> Any update for the above patch series?
> --
>         ---P.K.S

ping??



-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 1/5] nbd: fix might_sleep warning on socket shutdown.
  2016-06-02 10:24     ` [PATCH v2 1/5] nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
  2016-06-09 10:03       ` Pranay Srivastava
@ 2016-06-14  8:52       ` Markus Pargmann
  2016-06-14  9:50         ` Pranay Srivastava
  2016-06-24 10:09         ` [PATCH v3 0/3] nbd: resolve bugs and limitations Pranay Kr. Srivastava
  1 sibling, 2 replies; 48+ messages in thread
From: Markus Pargmann @ 2016-06-14  8:52 UTC (permalink / raw)
  To: Pranay Kr. Srivastava; +Cc: nbd-general, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 5630 bytes --]

Hi,

On Thursday 02 June 2016 13:24:57 Pranay Kr. Srivastava wrote:
> spinlocked ranges should be small and not contain calls into huge
> subfunctions. Fix my mistake and just get the pointer to the socket
> instead of doing everything with spinlock held.
> 
> Reported-by: Mikulas Patocka <mikulas@twibright.com>
> Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
> 
> Changelog:
> Pranay Kr. Srivastava<pranjas@gmail.com>:
> 
> 1) Use spin_lock instead of irq version for sock_shutdown.
> 
> 2) Use system work queue to actually trigger the shutdown of
>    socket. This solves the issue when kernel_sendmsg is currently
>    blocked while a timeout occurs.
> 
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>

This looks better. Some smaller things inline. Also this patch does not
apply on my tree anymore. Can you please rebase it onto:
	http://git.pengutronix.de/?p=mpa/linux-nbd.git;a=shortlog;h=refs/heads/master

> ---
>  drivers/block/nbd.c | 65 ++++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 42 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 31e73a7..0339d40 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -39,6 +39,7 @@
>  #include <asm/types.h>
>  
>  #include <linux/nbd.h>
> +#include <linux/workqueue.h>
>  
>  struct nbd_device {
>  	u32 flags;
> @@ -69,6 +70,10 @@ struct nbd_device {
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>  	struct dentry *dbg_dir;
>  #endif
> +	/*
> +	 *This is specifically for calling sock_shutdown, for now.
> +	 */
> +	struct work_struct ws_shutdown;
>  };
>  
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -95,6 +100,11 @@ static int max_part;
>   */
>  static DEFINE_SPINLOCK(nbd_lock);
>  
> +/*
> + * Shutdown function for nbd_dev work struct.
> + */
> +static void nbd_ws_func_shutdown(struct work_struct *);
> +
>  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>  {
>  	return disk_to_dev(nbd->disk);
> @@ -172,39 +182,35 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
>   */
>  static void sock_shutdown(struct nbd_device *nbd)
>  {
> -	spin_lock_irq(&nbd->sock_lock);
> -
> -	if (!nbd->sock) {
> -		spin_unlock_irq(&nbd->sock_lock);
> -		return;
> -	}
> +	struct socket *sock;
>  
> -	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
> -	kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
> -	sockfd_put(nbd->sock);
> +	spin_lock(&nbd->sock_lock);
> +	sock = nbd->sock;
>  	nbd->sock = NULL;
> -	spin_unlock_irq(&nbd->sock_lock);
> +	spin_unlock(&nbd->sock_lock);
> +
> +	if (!sock)
> +		return;
>  
>  	del_timer(&nbd->timeout_timer);
> +	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
> +	kernel_sock_shutdown(sock, SHUT_RDWR);
> +	sockfd_put(sock);
>  }
>  
>  static void nbd_xmit_timeout(unsigned long arg)
>  {
>  	struct nbd_device *nbd = (struct nbd_device *)arg;
> -	unsigned long flags;
>  
>  	if (list_empty(&nbd->queue_head))
>  		return;
> -
> -	spin_lock_irqsave(&nbd->sock_lock, flags);
> -
>  	nbd->timedout = true;
> -
> -	if (nbd->sock)
> -		kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
> -
> -	spin_unlock_irqrestore(&nbd->sock_lock, flags);
> -
> +	schedule_work(&nbd->ws_shutdown);
> +	/*
> +	 * Make sure sender thread sees nbd->timedout.
> +	 */
> +	smp_wmb();

I am not sure that we need this memory barrier here. But as it is just
the timeout path it probably won't hurt.

> +	wake_up(&nbd->waiting_wq);
>  	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
>  }
>  
> @@ -592,7 +598,11 @@ static int nbd_thread_send(void *data)
>  		spin_unlock_irq(&nbd->queue_lock);
>  
>  		/* handle request */
> -		nbd_handle_req(nbd, req);
> +		if (nbd->timedout) {
> +			req->errors++;
> +			nbd_end_request(nbd, req);
> +		} else
> +			nbd_handle_req(nbd, req);
>  	}
>  
>  	nbd->task_send = NULL;
> @@ -672,6 +682,7 @@ static void nbd_reset(struct nbd_device *nbd)
>  	set_capacity(nbd->disk, 0);
>  	nbd->flags = 0;
>  	nbd->xmit_timeout = 0;
> +	INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
>  	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>  	del_timer_sync(&nbd->timeout_timer);
>  }
> @@ -804,15 +815,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>  		nbd_dev_dbg_close(nbd);
>  		kthread_stop(thread);
>  
> -		mutex_lock(&nbd->tx_lock);
> -
>  		sock_shutdown(nbd);
> +		mutex_lock(&nbd->tx_lock);
>  		nbd_clear_que(nbd);
>  		kill_bdev(bdev);
>  		nbd_bdev_reset(bdev);
>  
>  		if (nbd->disconnect) /* user requested, ignore socket errors */
>  			error = 0;
> +

Random newline here.

Best Regards,

Markus

>  		if (nbd->timedout)
>  			error = -ETIMEDOUT;
>  
> @@ -863,6 +874,14 @@ static const struct block_device_operations nbd_fops =
>  	.compat_ioctl =	nbd_ioctl,
>  };
>  
> +static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
> +{
> +	struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
> +							ws_shutdown);
> +
> +	sock_shutdown(nbd_dev);
> +}
> +
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>  
>  static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
> 

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 4/5]nbd: make nbd device wait for its users.
  2016-06-02 10:25     ` [PATCH v2 4/5]nbd: make nbd device wait for its users Pranay Kr. Srivastava
@ 2016-06-14  8:59       ` Markus Pargmann
  2016-06-14  9:33         ` Pranay Srivastava
  0 siblings, 1 reply; 48+ messages in thread
From: Markus Pargmann @ 2016-06-14  8:59 UTC (permalink / raw)
  To: Pranay Kr. Srivastava; +Cc: nbd-general, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 5096 bytes --]

On Thursday 02 June 2016 13:25:00 Pranay Kr. Srivastava wrote:
> When a timeout occurs or a recv fails, then
> instead of abruplty killing nbd block device
> wait for it's users to finish.
> 
> This is more required when filesystem(s) like
> ext2 or ext3 don't expect their buffer heads to
> disappear while the filesystem is mounted.
> 
> Each open of a nbd device is refcounted, while
> the userland program [nbd-client] doing the
> NBD_DO_IT ioctl would now wait for any other users
> of this device before invalidating the nbd device.
> 
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> ---
>  drivers/block/nbd.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index d1d898d..4da40dc 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -70,10 +70,13 @@ struct nbd_device {
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>  	struct dentry *dbg_dir;
>  #endif
> +	atomic_t inuse;
>  	/*
>  	 *This is specifically for calling sock_shutdown, for now.
>  	 */
>  	struct work_struct ws_shutdown;
> +	struct kref users;
> +	struct completion user_completion;
>  };
>  
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -104,6 +107,7 @@ static DEFINE_SPINLOCK(nbd_lock);
>   * Shutdown function for nbd_dev work struct.
>   */
>  static void nbd_ws_func_shutdown(struct work_struct *);
> +static void nbd_kref_release(struct kref *);
>  
>  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>  {
> @@ -682,6 +686,8 @@ static void nbd_reset(struct nbd_device *nbd)
>  	nbd->flags = 0;
>  	nbd->xmit_timeout = 0;
>  	INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
> +	init_completion(&nbd->user_completion);
> +	kref_init(&nbd->users);
>  	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>  	del_timer_sync(&nbd->timeout_timer);
>  }
> @@ -815,6 +821,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>  		kthread_stop(thread);
>  
>  		sock_shutdown(nbd);
> +		/*
> +		 * kref_init initializes with ref count as 1,
> +		 * nbd_client, or the user-land program executing
> +		 * this ioctl will make the refcount to 2[at least]
> +		 * so subtracting 2 from refcount.
> +		 */
> +		kref_sub(&nbd->users, 2, nbd_kref_release);

Why don't you use a kref_put?

> +		wait_for_completion(&nbd->user_completion);
>  		mutex_lock(&nbd->tx_lock);
>  		nbd_clear_que(nbd);
>  		kill_bdev(bdev);
> @@ -865,13 +879,56 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
>  
>  	return error;
>  }
> +static void nbd_kref_release(struct kref *kref_users)
> +{
> +	struct nbd_device *nbd = container_of(kref_users, struct nbd_device,
> +			users);

Not indented to opening bracket.

> +	pr_debug("Releasing kref [%s]\n", __func__);
> +	atomic_set(&nbd->inuse, 0);
> +	complete(&nbd->user_completion);
> +
> +}
> +
> +static int nbd_open(struct block_device *bdev, fmode_t mode)
> +{
> +	struct nbd_device *nbd_dev = bdev->bd_disk->private_data;
> +
> +	if (kref_get_unless_zero(&nbd_dev->users))
> +		atomic_set(&nbd_dev->inuse, 1);
> +
> +	pr_debug("Opening nbd_dev %s. Active users = %u\n",
> +			bdev->bd_disk->disk_name,
> +			atomic_read(&nbd_dev->users.refcount) - 1);

Indent to opening bracket.

> +	return 0;
> +}
> +
> +static void nbd_release(struct gendisk *disk, fmode_t mode)
> +{
> +	struct nbd_device *nbd_dev = disk->private_data;
> +	/*
> +	*kref_init initializes ref count to 1, so we
> +	*we check for refcount to be 2 for a final put.
> +	*
> +	*kref needs to be re-initialized just here as the
> +	*other process holding it must see the ref count as 2.
> +	*/
> +	if (atomic_read(&nbd_dev->inuse))
> +		kref_put(&nbd_dev->users,  nbd_kref_release);

What is this inuse atomic for? Everyone that releases the nbd device
will need to execute a kref_put().

Best Regards,

Markus

> +
> +	pr_debug("Closing nbd_dev %s. Active users = %u\n",
> +			disk->disk_name,
> +			atomic_read(&nbd_dev->users.refcount) - 1);
> +}
>  
>  static const struct block_device_operations nbd_fops = {
>  	.owner =	THIS_MODULE,
>  	.ioctl =	nbd_ioctl,
>  	.compat_ioctl =	nbd_ioctl,
> +	.open =		nbd_open,
> +	.release =	nbd_release
>  };
>  
> +
>  static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
>  {
>  	struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
> @@ -1107,6 +1164,7 @@ static int __init nbd_init(void)
>  		disk->fops = &nbd_fops;
>  		disk->private_data = &nbd_dev[i];
>  		sprintf(disk->disk_name, "nbd%d", i);
> +		atomic_set(&nbd_dev[i].inuse, 0);
>  		nbd_reset(&nbd_dev[i]);
>  		add_disk(disk);
>  	}
> 

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 4/5]nbd: make nbd device wait for its users.
  2016-06-14  8:59       ` Markus Pargmann
@ 2016-06-14  9:33         ` Pranay Srivastava
  2016-06-15  6:30           ` Markus Pargmann
  0 siblings, 1 reply; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-14  9:33 UTC (permalink / raw)
  To: Markus Pargmann; +Cc: nbd-general, linux-kernel

Hi Markus,

On Tue, Jun 14, 2016 at 2:29 PM, Markus Pargmann <mpa@pengutronix.de> wrote:
>
> On Thursday 02 June 2016 13:25:00 Pranay Kr. Srivastava wrote:
> > When a timeout occurs or a recv fails, then
> > instead of abruplty killing nbd block device
> > wait for it's users to finish.
> >
> > This is more required when filesystem(s) like
> > ext2 or ext3 don't expect their buffer heads to
> > disappear while the filesystem is mounted.
> >
> > Each open of a nbd device is refcounted, while
> > the userland program [nbd-client] doing the
> > NBD_DO_IT ioctl would now wait for any other users
> > of this device before invalidating the nbd device.
> >
> > Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> > ---
> >  drivers/block/nbd.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 58 insertions(+)
> >
> > diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> > index d1d898d..4da40dc 100644
> > --- a/drivers/block/nbd.c
> > +++ b/drivers/block/nbd.c
> > @@ -70,10 +70,13 @@ struct nbd_device {
> >  #if IS_ENABLED(CONFIG_DEBUG_FS)
> >       struct dentry *dbg_dir;
> >  #endif
> > +     atomic_t inuse;
> >       /*
> >        *This is specifically for calling sock_shutdown, for now.
> >        */
> >       struct work_struct ws_shutdown;
> > +     struct kref users;
> > +     struct completion user_completion;
> >  };
> >
> >  #if IS_ENABLED(CONFIG_DEBUG_FS)
> > @@ -104,6 +107,7 @@ static DEFINE_SPINLOCK(nbd_lock);
> >   * Shutdown function for nbd_dev work struct.
> >   */
> >  static void nbd_ws_func_shutdown(struct work_struct *);
> > +static void nbd_kref_release(struct kref *);
> >
> >  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
> >  {
> > @@ -682,6 +686,8 @@ static void nbd_reset(struct nbd_device *nbd)
> >       nbd->flags = 0;
> >       nbd->xmit_timeout = 0;
> >       INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
> > +     init_completion(&nbd->user_completion);
> > +     kref_init(&nbd->users);
> >       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
> >       del_timer_sync(&nbd->timeout_timer);
> >  }
> > @@ -815,6 +821,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
> >               kthread_stop(thread);
> >
> >               sock_shutdown(nbd);
> > +             /*
> > +              * kref_init initializes with ref count as 1,
> > +              * nbd_client, or the user-land program executing
> > +              * this ioctl will make the refcount to 2[at least]
> > +              * so subtracting 2 from refcount.
> > +              */
> > +             kref_sub(&nbd->users, 2, nbd_kref_release);
>
> Why don't you use a kref_put?

Ok, so I'll try to explain as I've understood the problem.

When the module is loaded the kref is initialized to 1.

Suppose now, someone has started nbd-client [nbdC-1] , then this
nbd-client will increase the ref count to 2. So far so good...

Now let's say this device is being shutdown via nbd-client[nbdC-2].

nbdC-1 will subtract the refcount by two, it has to do in NBD_DO_IT
since device file will not
be closed until after ioctl is over, and it'll wait_for_completion.

nbdC-2 now closes it's use of device file, this makes the refcount as
zero and completion
is triggered with nbdC-1 completed.

Now we don't want to trigger kref_put when nbdC-1 closes the device
file so kref_put needs
to be conditional in this regard so for that in_use is used.


>
> > +             wait_for_completion(&nbd->user_completion);
> >               mutex_lock(&nbd->tx_lock);
> >               nbd_clear_que(nbd);
> >               kill_bdev(bdev);
> > @@ -865,13 +879,56 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
> >
> >       return error;
> >  }
> > +static void nbd_kref_release(struct kref *kref_users)
> > +{
> > +     struct nbd_device *nbd = container_of(kref_users, struct nbd_device,
> > +                     users);
>
> Not indented to opening bracket.
>
> > +     pr_debug("Releasing kref [%s]\n", __func__);
> > +     atomic_set(&nbd->inuse, 0);
> > +     complete(&nbd->user_completion);
> > +
> > +}
> > +
> > +static int nbd_open(struct block_device *bdev, fmode_t mode)
> > +{
> > +     struct nbd_device *nbd_dev = bdev->bd_disk->private_data;
> > +
> > +     if (kref_get_unless_zero(&nbd_dev->users))
> > +             atomic_set(&nbd_dev->inuse, 1);
> > +
> > +     pr_debug("Opening nbd_dev %s. Active users = %u\n",
> > +                     bdev->bd_disk->disk_name,
> > +                     atomic_read(&nbd_dev->users.refcount) - 1);
>
> Indent to opening bracket.
>
> > +     return 0;
> > +}
> > +
> > +static void nbd_release(struct gendisk *disk, fmode_t mode)
> > +{
> > +     struct nbd_device *nbd_dev = disk->private_data;
> > +     /*
> > +     *kref_init initializes ref count to 1, so we
> > +     *we check for refcount to be 2 for a final put.
> > +     *
> > +     *kref needs to be re-initialized just here as the
> > +     *other process holding it must see the ref count as 2.
> > +     */
> > +     if (atomic_read(&nbd_dev->inuse))
> > +             kref_put(&nbd_dev->users,  nbd_kref_release);
>

> What is this inuse atomic for? Everyone that releases the nbd device
> will need to execute a kref_put().

To do away with inuse, perhaps we can do

kref_get just before leaving the NBD_DO_IT? so that when device file
is closed everyone
would do a kref_put? However there's a small race window while the
kref is being initialized,
and another process [not just nbd-client] is trying to open the device.

Do you think it's better to do this by introducing a spin_lock instead
of atomic?

Let me know if my understanding is correct.


>
> Best Regards,
>
> Markus
>
> > +
> > +     pr_debug("Closing nbd_dev %s. Active users = %u\n",
> > +                     disk->disk_name,
> > +                     atomic_read(&nbd_dev->users.refcount) - 1);
> > +}
> >
> >  static const struct block_device_operations nbd_fops = {
> >       .owner =        THIS_MODULE,
> >       .ioctl =        nbd_ioctl,
> >       .compat_ioctl = nbd_ioctl,
> > +     .open =         nbd_open,
> > +     .release =      nbd_release
> >  };
> >
> > +
> >  static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
> >  {
> >       struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
> > @@ -1107,6 +1164,7 @@ static int __init nbd_init(void)
> >               disk->fops = &nbd_fops;
> >               disk->private_data = &nbd_dev[i];
> >               sprintf(disk->disk_name, "nbd%d", i);
> > +             atomic_set(&nbd_dev[i].inuse, 0);
> >               nbd_reset(&nbd_dev[i]);
> >               add_disk(disk);
> >       }
> >
>
> --
> Pengutronix e.K.                           |                             |
> Industrial Linux Solutions                 | http://www.pengutronix.de/  |
> Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
> Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |




-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 1/5] nbd: fix might_sleep warning on socket shutdown.
  2016-06-14  8:52       ` Markus Pargmann
@ 2016-06-14  9:50         ` Pranay Srivastava
  2016-06-24 10:09         ` [PATCH v3 0/3] nbd: resolve bugs and limitations Pranay Kr. Srivastava
  1 sibling, 0 replies; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-14  9:50 UTC (permalink / raw)
  To: Markus Pargmann; +Cc: nbd-general, linux-kernel

Hi

On Tue, Jun 14, 2016 at 2:22 PM, Markus Pargmann <mpa@pengutronix.de> wrote:
> Hi,
>
> On Thursday 02 June 2016 13:24:57 Pranay Kr. Srivastava wrote:
>> spinlocked ranges should be small and not contain calls into huge
>> subfunctions. Fix my mistake and just get the pointer to the socket
>> instead of doing everything with spinlock held.
>>
>> Reported-by: Mikulas Patocka <mikulas@twibright.com>
>> Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
>>
>> Changelog:
>> Pranay Kr. Srivastava<pranjas@gmail.com>:
>>
>> 1) Use spin_lock instead of irq version for sock_shutdown.
>>
>> 2) Use system work queue to actually trigger the shutdown of
>>    socket. This solves the issue when kernel_sendmsg is currently
>>    blocked while a timeout occurs.
>>
>> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
>
> This looks better. Some smaller things inline. Also this patch does not
> apply on my tree anymore. Can you please rebase it onto:
>         http://git.pengutronix.de/?p=mpa/linux-nbd.git;a=shortlog;h=refs/heads/master
>

Ok will do.

>> ---
>>  drivers/block/nbd.c | 65 ++++++++++++++++++++++++++++++++++-------------------
>>  1 file changed, 42 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
>> index 31e73a7..0339d40 100644
>> --- a/drivers/block/nbd.c
>> +++ b/drivers/block/nbd.c
>> @@ -39,6 +39,7 @@
>>  #include <asm/types.h>
>>
>>  #include <linux/nbd.h>
>> +#include <linux/workqueue.h>
>>
>>  struct nbd_device {
>>       u32 flags;
>> @@ -69,6 +70,10 @@ struct nbd_device {
>>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>>       struct dentry *dbg_dir;
>>  #endif
>> +     /*
>> +      *This is specifically for calling sock_shutdown, for now.
>> +      */
>> +     struct work_struct ws_shutdown;
>>  };
>>
>>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>> @@ -95,6 +100,11 @@ static int max_part;
>>   */
>>  static DEFINE_SPINLOCK(nbd_lock);
>>
>> +/*
>> + * Shutdown function for nbd_dev work struct.
>> + */
>> +static void nbd_ws_func_shutdown(struct work_struct *);
>> +
>>  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>>  {
>>       return disk_to_dev(nbd->disk);
>> @@ -172,39 +182,35 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
>>   */
>>  static void sock_shutdown(struct nbd_device *nbd)
>>  {
>> -     spin_lock_irq(&nbd->sock_lock);
>> -
>> -     if (!nbd->sock) {
>> -             spin_unlock_irq(&nbd->sock_lock);
>> -             return;
>> -     }
>> +     struct socket *sock;
>>
>> -     dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
>> -     kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
>> -     sockfd_put(nbd->sock);
>> +     spin_lock(&nbd->sock_lock);
>> +     sock = nbd->sock;
>>       nbd->sock = NULL;
>> -     spin_unlock_irq(&nbd->sock_lock);
>> +     spin_unlock(&nbd->sock_lock);
>> +
>> +     if (!sock)
>> +             return;
>>
>>       del_timer(&nbd->timeout_timer);
>> +     dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
>> +     kernel_sock_shutdown(sock, SHUT_RDWR);
>> +     sockfd_put(sock);
>>  }
>>
>>  static void nbd_xmit_timeout(unsigned long arg)
>>  {
>>       struct nbd_device *nbd = (struct nbd_device *)arg;
>> -     unsigned long flags;
>>
>>       if (list_empty(&nbd->queue_head))
>>               return;
>> -
>> -     spin_lock_irqsave(&nbd->sock_lock, flags);
>> -
>>       nbd->timedout = true;
>> -
>> -     if (nbd->sock)
>> -             kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
>> -
>> -     spin_unlock_irqrestore(&nbd->sock_lock, flags);
>> -
>> +     schedule_work(&nbd->ws_shutdown);
>> +     /*
>> +      * Make sure sender thread sees nbd->timedout.
>> +      */
>> +     smp_wmb();
>
> I am not sure that we need this memory barrier here. But as it is just
> the timeout path it probably won't hurt.
>
>> +     wake_up(&nbd->waiting_wq);
>>       dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
>>  }
>>
>> @@ -592,7 +598,11 @@ static int nbd_thread_send(void *data)
>>               spin_unlock_irq(&nbd->queue_lock);
>>
>>               /* handle request */
>> -             nbd_handle_req(nbd, req);
>> +             if (nbd->timedout) {
>> +                     req->errors++;
>> +                     nbd_end_request(nbd, req);
>> +             } else
>> +                     nbd_handle_req(nbd, req);
>>       }
>>
>>       nbd->task_send = NULL;
>> @@ -672,6 +682,7 @@ static void nbd_reset(struct nbd_device *nbd)
>>       set_capacity(nbd->disk, 0);
>>       nbd->flags = 0;
>>       nbd->xmit_timeout = 0;
>> +     INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
>>       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>>       del_timer_sync(&nbd->timeout_timer);
>>  }
>> @@ -804,15 +815,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>>               nbd_dev_dbg_close(nbd);
>>               kthread_stop(thread);
>>
>> -             mutex_lock(&nbd->tx_lock);
>> -
>>               sock_shutdown(nbd);
>> +             mutex_lock(&nbd->tx_lock);
>>               nbd_clear_que(nbd);
>>               kill_bdev(bdev);
>>               nbd_bdev_reset(bdev);
>>
>>               if (nbd->disconnect) /* user requested, ignore socket errors */
>>                       error = 0;
>> +
>
> Random newline here.
Ok, will take care of this.

>
> Best Regards,
>
> Markus
>
>>               if (nbd->timedout)
>>                       error = -ETIMEDOUT;
>>
>> @@ -863,6 +874,14 @@ static const struct block_device_operations nbd_fops =
>>       .compat_ioctl = nbd_ioctl,
>>  };
>>
>> +static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
>> +{
>> +     struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
>> +                                                     ws_shutdown);
>> +
>> +     sock_shutdown(nbd_dev);
>> +}
>> +
>>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>>
>>  static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
>>
>
> --
> Pengutronix e.K.                           |                             |
> Industrial Linux Solutions                 | http://www.pengutronix.de/  |
> Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
> Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |



-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 4/5]nbd: make nbd device wait for its users.
  2016-06-14  9:33         ` Pranay Srivastava
@ 2016-06-15  6:30           ` Markus Pargmann
  2016-06-15  7:00             ` [Nbd] " Wouter Verhelst
  2016-06-15  9:17             ` Pranay Srivastava
  0 siblings, 2 replies; 48+ messages in thread
From: Markus Pargmann @ 2016-06-15  6:30 UTC (permalink / raw)
  To: Pranay Srivastava; +Cc: nbd-general, linux-kernel, Wouter Verhelst

[-- Attachment #1: Type: text/plain, Size: 8902 bytes --]

Hi Pranay,

On Tuesday 14 June 2016 15:03:40 Pranay Srivastava wrote:
> Hi Markus,
> 
> On Tue, Jun 14, 2016 at 2:29 PM, Markus Pargmann <mpa@pengutronix.de> wrote:
> >
> > On Thursday 02 June 2016 13:25:00 Pranay Kr. Srivastava wrote:
> > > When a timeout occurs or a recv fails, then
> > > instead of abruplty killing nbd block device
> > > wait for it's users to finish.
> > >
> > > This is more required when filesystem(s) like
> > > ext2 or ext3 don't expect their buffer heads to
> > > disappear while the filesystem is mounted.
> > >
> > > Each open of a nbd device is refcounted, while
> > > the userland program [nbd-client] doing the
> > > NBD_DO_IT ioctl would now wait for any other users
> > > of this device before invalidating the nbd device.
> > >
> > > Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> > > ---
> > >  drivers/block/nbd.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 58 insertions(+)
> > >
> > > diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> > > index d1d898d..4da40dc 100644
> > > --- a/drivers/block/nbd.c
> > > +++ b/drivers/block/nbd.c
> > > @@ -70,10 +70,13 @@ struct nbd_device {
> > >  #if IS_ENABLED(CONFIG_DEBUG_FS)
> > >       struct dentry *dbg_dir;
> > >  #endif
> > > +     atomic_t inuse;
> > >       /*
> > >        *This is specifically for calling sock_shutdown, for now.
> > >        */
> > >       struct work_struct ws_shutdown;
> > > +     struct kref users;
> > > +     struct completion user_completion;
> > >  };
> > >
> > >  #if IS_ENABLED(CONFIG_DEBUG_FS)
> > > @@ -104,6 +107,7 @@ static DEFINE_SPINLOCK(nbd_lock);
> > >   * Shutdown function for nbd_dev work struct.
> > >   */
> > >  static void nbd_ws_func_shutdown(struct work_struct *);
> > > +static void nbd_kref_release(struct kref *);
> > >
> > >  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
> > >  {
> > > @@ -682,6 +686,8 @@ static void nbd_reset(struct nbd_device *nbd)
> > >       nbd->flags = 0;
> > >       nbd->xmit_timeout = 0;
> > >       INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
> > > +     init_completion(&nbd->user_completion);
> > > +     kref_init(&nbd->users);
> > >       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
> > >       del_timer_sync(&nbd->timeout_timer);
> > >  }
> > > @@ -815,6 +821,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
> > >               kthread_stop(thread);
> > >
> > >               sock_shutdown(nbd);
> > > +             /*
> > > +              * kref_init initializes with ref count as 1,
> > > +              * nbd_client, or the user-land program executing
> > > +              * this ioctl will make the refcount to 2[at least]
> > > +              * so subtracting 2 from refcount.
> > > +              */
> > > +             kref_sub(&nbd->users, 2, nbd_kref_release);
> >
> > Why don't you use a kref_put?
> 
> Ok, so I'll try to explain as I've understood the problem.
> 
> When the module is loaded the kref is initialized to 1.
> 
> Suppose now, someone has started nbd-client [nbdC-1] , then this
> nbd-client will increase the ref count to 2. So far so good...
> 
> Now let's say this device is being shutdown via nbd-client[nbdC-2].
> 
> nbdC-1 will subtract the refcount by two, it has to do in NBD_DO_IT
> since device file will not
> be closed until after ioctl is over, and it'll wait_for_completion.
> 
> nbdC-2 now closes it's use of device file, this makes the refcount as
> zero and completion
> is triggered with nbdC-1 completed.
> 
> Now we don't want to trigger kref_put when nbdC-1 closes the device
> file so kref_put needs
> to be conditional in this regard so for that in_use is used.
> 
> 
> >
> > > +             wait_for_completion(&nbd->user_completion);
> > >               mutex_lock(&nbd->tx_lock);
> > >               nbd_clear_que(nbd);
> > >               kill_bdev(bdev);
> > > @@ -865,13 +879,56 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
> > >
> > >       return error;
> > >  }
> > > +static void nbd_kref_release(struct kref *kref_users)
> > > +{
> > > +     struct nbd_device *nbd = container_of(kref_users, struct nbd_device,
> > > +                     users);
> >
> > Not indented to opening bracket.
> >
> > > +     pr_debug("Releasing kref [%s]\n", __func__);
> > > +     atomic_set(&nbd->inuse, 0);
> > > +     complete(&nbd->user_completion);
> > > +
> > > +}
> > > +
> > > +static int nbd_open(struct block_device *bdev, fmode_t mode)
> > > +{
> > > +     struct nbd_device *nbd_dev = bdev->bd_disk->private_data;
> > > +
> > > +     if (kref_get_unless_zero(&nbd_dev->users))
> > > +             atomic_set(&nbd_dev->inuse, 1);
> > > +
> > > +     pr_debug("Opening nbd_dev %s. Active users = %u\n",
> > > +                     bdev->bd_disk->disk_name,
> > > +                     atomic_read(&nbd_dev->users.refcount) - 1);
> >
> > Indent to opening bracket.
> >
> > > +     return 0;
> > > +}
> > > +
> > > +static void nbd_release(struct gendisk *disk, fmode_t mode)
> > > +{
> > > +     struct nbd_device *nbd_dev = disk->private_data;
> > > +     /*
> > > +     *kref_init initializes ref count to 1, so we
> > > +     *we check for refcount to be 2 for a final put.
> > > +     *
> > > +     *kref needs to be re-initialized just here as the
> > > +     *other process holding it must see the ref count as 2.
> > > +     */
> > > +     if (atomic_read(&nbd_dev->inuse))
> > > +             kref_put(&nbd_dev->users,  nbd_kref_release);
> >
> 
> > What is this inuse atomic for? Everyone that releases the nbd device
> > will need to execute a kref_put().
> 
> To do away with inuse, perhaps we can do
> 
> kref_get just before leaving the NBD_DO_IT? so that when device file
> is closed everyone
> would do a kref_put? However there's a small race window while the
> kref is being initialized,
> and another process [not just nbd-client] is trying to open the device.
> 
> Do you think it's better to do this by introducing a spin_lock instead
> of atomic?
> 
> Let me know if my understanding is correct.

Thanks for the explanations. I think my understanding was off by one ;).
I didn't realize that the DO_IT thread from the userspace has the block
device open as well.

I thought a bit about this, does it make sense to delay the essential
cleanup steps until really all open file handles were closed? So that
even if the DO_IT thread exits, the block device is still there. Only if
the file is closed everything is cleaned up. Maybe this makes the code
simpler and we can directly use krefs without any strange constructs.
What do you think?

This would also allow the client to setup a new socket as long as it
does not close the nbd file handle.

Could this behavior be potentially problematic for any client
implementation? Does it solve our other issue with setting up a new
sockets for an existing nbd blockdevice?

Cc Wouter

Best Regards,

Markus

> 
> 
> >
> > Best Regards,
> >
> > Markus
> >
> > > +
> > > +     pr_debug("Closing nbd_dev %s. Active users = %u\n",
> > > +                     disk->disk_name,
> > > +                     atomic_read(&nbd_dev->users.refcount) - 1);
> > > +}
> > >
> > >  static const struct block_device_operations nbd_fops = {
> > >       .owner =        THIS_MODULE,
> > >       .ioctl =        nbd_ioctl,
> > >       .compat_ioctl = nbd_ioctl,
> > > +     .open =         nbd_open,
> > > +     .release =      nbd_release
> > >  };
> > >
> > > +
> > >  static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
> > >  {
> > >       struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
> > > @@ -1107,6 +1164,7 @@ static int __init nbd_init(void)
> > >               disk->fops = &nbd_fops;
> > >               disk->private_data = &nbd_dev[i];
> > >               sprintf(disk->disk_name, "nbd%d", i);
> > > +             atomic_set(&nbd_dev[i].inuse, 0);
> > >               nbd_reset(&nbd_dev[i]);
> > >               add_disk(disk);
> > >       }
> > >
> >
> > --
> > Pengutronix e.K.                           |                             |
> > Industrial Linux Solutions                 | http://www.pengutronix.de/  |
> > Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
> > Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |
> 
> 
> 
> 
> 

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Nbd] [PATCH v2 4/5]nbd: make nbd device wait for its users.
  2016-06-15  6:30           ` Markus Pargmann
@ 2016-06-15  7:00             ` Wouter Verhelst
  2016-06-15  9:18               ` Pranay Srivastava
  2016-06-15  9:17             ` Pranay Srivastava
  1 sibling, 1 reply; 48+ messages in thread
From: Wouter Verhelst @ 2016-06-15  7:00 UTC (permalink / raw)
  To: Markus Pargmann; +Cc: Pranay Srivastava, nbd-general, linux-kernel

On Wed, Jun 15, 2016 at 08:30:45AM +0200, Markus Pargmann wrote:
> Thanks for the explanations. I think my understanding was off by one ;)..
> I didn't realize that the DO_IT thread from the userspace has the block
> device open as well.

Obviously, otherwise it couldn't do an ioctl() to it :-)

> I thought a bit about this, does it make sense to delay the essential
> cleanup steps until really all open file handles were closed? So that
> even if the DO_IT thread exits, the block device is still there. Only if
> the file is closed everything is cleaned up. Maybe this makes the code
> simpler and we can directly use krefs without any strange constructs.
> What do you think?
> 
> This would also allow the client to setup a new socket as long as it
> does not close the nbd file handle.

That sounds like the behaviour that I described earlier about possible
retries for userspace...

> Could this behavior be potentially problematic for any client
> implementation?

I don't think it could, but I'm not sure I understand all the details.
What would happen if:

- nbd is connected from pid X, pid Y does NBD_DISCONNECT, pid X hangs
  and doesn't exit?
- nbd is connected from pid X, server disconnects while pid Y is trying
  to access the device, pid X tries to reconnect but it takes a while?

> Does it solve our other issue with setting up a new sockets for an
> existing nbd blockdevice?

It could, depending.

-- 
< ron> I mean, the main *practical* problem with C++, is there's like a dozen
       people in the world who think they really understand all of its rules,
       and pretty much all of them are just lying to themselves too.
 -- #debian-devel, OFTC, 2016-02-12

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 4/5]nbd: make nbd device wait for its users.
  2016-06-15  6:30           ` Markus Pargmann
  2016-06-15  7:00             ` [Nbd] " Wouter Verhelst
@ 2016-06-15  9:17             ` Pranay Srivastava
  2016-06-24  9:29               ` [PATCH 1/2] nbd: " Markus Pargmann
  1 sibling, 1 reply; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-15  9:17 UTC (permalink / raw)
  To: Markus Pargmann; +Cc: nbd-general, linux-kernel, Wouter Verhelst

Hey Markus,

On Wed, Jun 15, 2016 at 12:00 PM, Markus Pargmann <mpa@pengutronix.de> wrote:
> Hi Pranay,
>
> On Tuesday 14 June 2016 15:03:40 Pranay Srivastava wrote:
>> Hi Markus,
>>
>> On Tue, Jun 14, 2016 at 2:29 PM, Markus Pargmann <mpa@pengutronix.de> wrote:
>> >
>> > On Thursday 02 June 2016 13:25:00 Pranay Kr. Srivastava wrote:
>> > > When a timeout occurs or a recv fails, then
>> > > instead of abruplty killing nbd block device
>> > > wait for it's users to finish.
>> > >
>> > > This is more required when filesystem(s) like
>> > > ext2 or ext3 don't expect their buffer heads to
>> > > disappear while the filesystem is mounted.
>> > >
>> > > Each open of a nbd device is refcounted, while
>> > > the userland program [nbd-client] doing the
>> > > NBD_DO_IT ioctl would now wait for any other users
>> > > of this device before invalidating the nbd device.
>> > >
>> > > Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
>> > > ---
>> > >  drivers/block/nbd.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>> > >  1 file changed, 58 insertions(+)
>> > >
>> > > diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
>> > > index d1d898d..4da40dc 100644
>> > > --- a/drivers/block/nbd.c
>> > > +++ b/drivers/block/nbd.c
>> > > @@ -70,10 +70,13 @@ struct nbd_device {
>> > >  #if IS_ENABLED(CONFIG_DEBUG_FS)
>> > >       struct dentry *dbg_dir;
>> > >  #endif
>> > > +     atomic_t inuse;
>> > >       /*
>> > >        *This is specifically for calling sock_shutdown, for now.
>> > >        */
>> > >       struct work_struct ws_shutdown;
>> > > +     struct kref users;
>> > > +     struct completion user_completion;
>> > >  };
>> > >
>> > >  #if IS_ENABLED(CONFIG_DEBUG_FS)
>> > > @@ -104,6 +107,7 @@ static DEFINE_SPINLOCK(nbd_lock);
>> > >   * Shutdown function for nbd_dev work struct.
>> > >   */
>> > >  static void nbd_ws_func_shutdown(struct work_struct *);
>> > > +static void nbd_kref_release(struct kref *);
>> > >
>> > >  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>> > >  {
>> > > @@ -682,6 +686,8 @@ static void nbd_reset(struct nbd_device *nbd)
>> > >       nbd->flags = 0;
>> > >       nbd->xmit_timeout = 0;
>> > >       INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
>> > > +     init_completion(&nbd->user_completion);
>> > > +     kref_init(&nbd->users);
>> > >       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>> > >       del_timer_sync(&nbd->timeout_timer);
>> > >  }
>> > > @@ -815,6 +821,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>> > >               kthread_stop(thread);
>> > >
>> > >               sock_shutdown(nbd);
>> > > +             /*
>> > > +              * kref_init initializes with ref count as 1,
>> > > +              * nbd_client, or the user-land program executing
>> > > +              * this ioctl will make the refcount to 2[at least]
>> > > +              * so subtracting 2 from refcount.
>> > > +              */
>> > > +             kref_sub(&nbd->users, 2, nbd_kref_release);
>> >
>> > Why don't you use a kref_put?
>>
>> Ok, so I'll try to explain as I've understood the problem.
>>
>> When the module is loaded the kref is initialized to 1.
>>
>> Suppose now, someone has started nbd-client [nbdC-1] , then this
>> nbd-client will increase the ref count to 2. So far so good...
>>
>> Now let's say this device is being shutdown via nbd-client[nbdC-2].
>>
>> nbdC-1 will subtract the refcount by two, it has to do in NBD_DO_IT
>> since device file will not
>> be closed until after ioctl is over, and it'll wait_for_completion.
>>
>> nbdC-2 now closes it's use of device file, this makes the refcount as
>> zero and completion
>> is triggered with nbdC-1 completed.
>>
>> Now we don't want to trigger kref_put when nbdC-1 closes the device
>> file so kref_put needs
>> to be conditional in this regard so for that in_use is used.
>>
>>
>> >
>> > > +             wait_for_completion(&nbd->user_completion);
>> > >               mutex_lock(&nbd->tx_lock);
>> > >               nbd_clear_que(nbd);
>> > >               kill_bdev(bdev);
>> > > @@ -865,13 +879,56 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
>> > >
>> > >       return error;
>> > >  }
>> > > +static void nbd_kref_release(struct kref *kref_users)
>> > > +{
>> > > +     struct nbd_device *nbd = container_of(kref_users, struct nbd_device,
>> > > +                     users);
>> >
>> > Not indented to opening bracket.
>> >
>> > > +     pr_debug("Releasing kref [%s]\n", __func__);
>> > > +     atomic_set(&nbd->inuse, 0);
>> > > +     complete(&nbd->user_completion);
>> > > +
>> > > +}
>> > > +
>> > > +static int nbd_open(struct block_device *bdev, fmode_t mode)
>> > > +{
>> > > +     struct nbd_device *nbd_dev = bdev->bd_disk->private_data;
>> > > +
>> > > +     if (kref_get_unless_zero(&nbd_dev->users))
>> > > +             atomic_set(&nbd_dev->inuse, 1);
>> > > +
>> > > +     pr_debug("Opening nbd_dev %s. Active users = %u\n",
>> > > +                     bdev->bd_disk->disk_name,
>> > > +                     atomic_read(&nbd_dev->users.refcount) - 1);
>> >
>> > Indent to opening bracket.
>> >
>> > > +     return 0;
>> > > +}
>> > > +
>> > > +static void nbd_release(struct gendisk *disk, fmode_t mode)
>> > > +{
>> > > +     struct nbd_device *nbd_dev = disk->private_data;
>> > > +     /*
>> > > +     *kref_init initializes ref count to 1, so we
>> > > +     *we check for refcount to be 2 for a final put.
>> > > +     *
>> > > +     *kref needs to be re-initialized just here as the
>> > > +     *other process holding it must see the ref count as 2.
>> > > +     */
>> > > +     if (atomic_read(&nbd_dev->inuse))
>> > > +             kref_put(&nbd_dev->users,  nbd_kref_release);
>> >
>>
>> > What is this inuse atomic for? Everyone that releases the nbd device
>> > will need to execute a kref_put().
>>
>> To do away with inuse, perhaps we can do
>>
>> kref_get just before leaving the NBD_DO_IT? so that when device file
>> is closed everyone
>> would do a kref_put? However there's a small race window while the
>> kref is being initialized,
>> and another process [not just nbd-client] is trying to open the device.
>>
>> Do you think it's better to do this by introducing a spin_lock instead
>> of atomic?
>>
>> Let me know if my understanding is correct.
>
> Thanks for the explanations. I think my understanding was off by one ;).
> I didn't realize that the DO_IT thread from the userspace has the block
> device open as well.
>
> I thought a bit about this, does it make sense to delay the essential
> cleanup steps until really all open file handles were closed? So that
> even if the DO_IT thread exits, the block device is still there. Only if
> the file is closed everything is cleaned up. Maybe this makes the code
> simpler and we can directly use krefs without any strange constructs.
> What do you think?
>

I chose open/close as that is the common interface to all processes that need
to use nbd device and not just nbd-client.

Let's take example of a mount, so some user has just done a mount of this device
and right now it's not doing anything. So someone issues an NBD_DISCONNECT
and wham... The idea is to propagate errors to user space correctly.

So the solution I've proposed says, if there's someone
apart_from_nbd-client (which
is currently waiting for NBD_DO_IT to complete) is
using this device then nbd-client should honor that and shouldn't
allow this device
to be reused until after all such processes have left this device.


> This would also allow the client to setup a new socket as long as it
> does not close the nbd file handle.

I think that is still possible right, which is why there's a kref_sub
of 2, so the wait
is only for the "other processes" using this device and _not_ this
nbd-client whose
NBD_DO_IT just got over.

Now I'm just concerned over the processes which are trying to use this
nbd device
but the re-initialzation code at the end of NBD_DO_IT hasn't been done. So it's
possible some device may skip getting a kref, due to kref_get_unless_zero.

Actually I wanted to put this kref under bd_mutex to avoid such races and it'll
always be a call to kref_get in open and kref_put in close. Still
kref_sub(2) would
be required :-)

>
> Could this behavior be potentially problematic for any client
> implementation? Does it solve our other issue with setting up a new
> sockets for an existing nbd blockdevice?

I don't think that would be a problem. sock_shutdown is before the wait
right? So in error condition the worker thread would close it and set to null
while for a normal case, sock_shutdown in NBD_DO_IT would set that.
So it should be OK.

>
> Cc Wouter
>
> Best Regards,
>
> Markus
>
>>
>>
>> >
>> > Best Regards,
>> >
>> > Markus
>> >
>> > > +
>> > > +     pr_debug("Closing nbd_dev %s. Active users = %u\n",
>> > > +                     disk->disk_name,
>> > > +                     atomic_read(&nbd_dev->users.refcount) - 1);
>> > > +}
>> > >
>> > >  static const struct block_device_operations nbd_fops = {
>> > >       .owner =        THIS_MODULE,
>> > >       .ioctl =        nbd_ioctl,
>> > >       .compat_ioctl = nbd_ioctl,
>> > > +     .open =         nbd_open,
>> > > +     .release =      nbd_release
>> > >  };
>> > >
>> > > +
>> > >  static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
>> > >  {
>> > >       struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
>> > > @@ -1107,6 +1164,7 @@ static int __init nbd_init(void)
>> > >               disk->fops = &nbd_fops;
>> > >               disk->private_data = &nbd_dev[i];
>> > >               sprintf(disk->disk_name, "nbd%d", i);
>> > > +             atomic_set(&nbd_dev[i].inuse, 0);
>> > >               nbd_reset(&nbd_dev[i]);
>> > >               add_disk(disk);
>> > >       }
>> > >
>> >
>> > --
>> > Pengutronix e.K.                           |                             |
>> > Industrial Linux Solutions                 | http://www.pengutronix.de/  |
>> > Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
>> > Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |
>>
>>
>>
>>
>>
>
> --
> Pengutronix e.K.                           |                             |
> Industrial Linux Solutions                 | http://www.pengutronix.de/  |
> Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
> Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |



-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Nbd] [PATCH v2 4/5]nbd: make nbd device wait for its users.
  2016-06-15  7:00             ` [Nbd] " Wouter Verhelst
@ 2016-06-15  9:18               ` Pranay Srivastava
  0 siblings, 0 replies; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-15  9:18 UTC (permalink / raw)
  To: Wouter Verhelst; +Cc: Markus Pargmann, nbd-general, linux-kernel

On Wed, Jun 15, 2016 at 12:30 PM, Wouter Verhelst <w@uter.be> wrote:
> On Wed, Jun 15, 2016 at 08:30:45AM +0200, Markus Pargmann wrote:
>> Thanks for the explanations. I think my understanding was off by one ;)..
>> I didn't realize that the DO_IT thread from the userspace has the block
>> device open as well.
>
> Obviously, otherwise it couldn't do an ioctl() to it :-)
>
>> I thought a bit about this, does it make sense to delay the essential
>> cleanup steps until really all open file handles were closed? So that
>> even if the DO_IT thread exits, the block device is still there. Only if
>> the file is closed everything is cleaned up. Maybe this makes the code
>> simpler and we can directly use krefs without any strange constructs.
>> What do you think?


>>
>> This would also allow the client to setup a new socket as long as it
>> does not close the nbd file handle.
>
> That sounds like the behaviour that I described earlier about possible
> retries for userspace...
>
>> Could this behavior be potentially problematic for any client
>> implementation?
>
> I don't think it could, but I'm not sure I understand all the details.
> What would happen if:
>
> - nbd is connected from pid X, pid Y does NBD_DISCONNECT, pid X hangs
>   and doesn't exit?

In that case pid X would have an error on recv if I'am correct. Then if no other
users[like mounts or other user space applications] are present for this device
then pidx would exit.

> - nbd is connected from pid X, server disconnects while pid Y is trying
>   to access the device, pid X tries to reconnect but it takes a while?

Not sure what issue you see in the above case but if I understand correctly,

This should again fall in error case [Correct?] as a result of a timeout may be
if any requests were in progress. I don't think reconnect will be
without any error
thrown up user space, not so sure if both sides [server and client]
were sitting idle.
OR
if no timeout was there then things should go OK after a successful reconnect.

Multiple processes can still access the device.

>
>> Does it solve our other issue with setting up a new sockets for an
>> existing nbd blockdevice?
>
> It could, depending.

This should be OK.

>
> --
> < ron> I mean, the main *practical* problem with C++, is there's like a dozen
>        people in the world who think they really understand all of its rules,
>        and pretty much all of them are just lying to themselves too.
>  -- #debian-devel, OFTC, 2016-02-12



-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 1/2] nbd: make nbd device wait for its users
  2016-06-15  9:17             ` Pranay Srivastava
@ 2016-06-24  9:29               ` Markus Pargmann
  2016-06-24  9:29                 ` [PATCH 2/2] nbd: Disallow ioctls on disconnected block device Markus Pargmann
                                   ` (3 more replies)
  0 siblings, 4 replies; 48+ messages in thread
From: Markus Pargmann @ 2016-06-24  9:29 UTC (permalink / raw)
  To: Pranay Kr. Srivastava
  Cc: nbd-general, linux-kernel, Wouter Verhelst, Markus Pargmann

From: "Pranay Kr. Srivastava" <pranjas@gmail.com>

When a timeout occurs or a recv fails, then instead of abruplty killing
nbd block device wait for it's users to finish.

This is more required when filesystem(s) like ext2 or ext3 don't expect
their buffer heads to disappear while the filesystem is mounted.

Each open is counted. The blockdevice is kept open until the last user
closes the block device. This offers the possibility as well to open a
new socket to be used while the filesystems are mounted.

Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>

[mpa: Keep the blockdevice open until all users left]
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
---
Hi,

I used your patch and changed it a bit based on my ideas. The general
difference is that this keeps the block device open. After all users left, the
device is reset.

The followup patch then restricts access to ioctls after a disconnect. I wanted
to avoid that anyone sets up anything new without all the old users leaving.

Please let me know what you think about this.

Best Regards,

Markus

 drivers/block/nbd.c | 62 ++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 17 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 1efc26bf1d21..620660f3ff0f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -69,6 +69,8 @@ struct nbd_device {
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct dentry *dbg_dir;
 #endif
+	atomic_t users; /* Users that opened the block device */
+	struct block_device *bdev;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -655,9 +657,26 @@ static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
 	return ret;
 }
 
+static void nbd_bdev_reset(struct block_device *bdev)
+{
+	set_device_ro(bdev, false);
+	bdev->bd_inode->i_size = 0;
+	if (max_part > 0) {
+		blkdev_reread_part(bdev);
+		bdev->bd_invalidated = 1;
+	}
+}
+
 /* Reset all properties of an NBD device */
 static void nbd_reset(struct nbd_device *nbd)
 {
+	sock_shutdown(nbd);
+	nbd_clear_que(nbd);
+	if (nbd->bdev) {
+		kill_bdev(nbd->bdev);
+		nbd_bdev_reset(nbd->bdev);
+	}
+
 	nbd->disconnect = false;
 	nbd->timedout = false;
 	nbd->blksize = 1024;
@@ -669,16 +688,6 @@ static void nbd_reset(struct nbd_device *nbd)
 	del_timer_sync(&nbd->timeout_timer);
 }
 
-static void nbd_bdev_reset(struct block_device *bdev)
-{
-	set_device_ro(bdev, false);
-	bdev->bd_inode->i_size = 0;
-	if (max_part > 0) {
-		blkdev_reread_part(bdev);
-		bdev->bd_invalidated = 1;
-	}
-}
-
 static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
 {
 	if (nbd->flags & NBD_FLAG_READ_ONLY)
@@ -803,18 +812,11 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		mutex_lock(&nbd->tx_lock);
 		nbd->task_recv = NULL;
 
-		sock_shutdown(nbd);
-		nbd_clear_que(nbd);
-		kill_bdev(bdev);
-		nbd_bdev_reset(bdev);
-
 		if (nbd->disconnect) /* user requested, ignore socket errors */
 			error = 0;
 		if (nbd->timedout)
 			error = -ETIMEDOUT;
 
-		nbd_reset(nbd);
-
 		return error;
 	}
 
@@ -853,10 +855,35 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 	return error;
 }
 
+static int nbd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct nbd_device *nbd = bdev->bd_disk->private_data;
+
+	atomic_inc(&nbd->users);
+
+	if (!nbd->bdev)
+		nbd->bdev = bdev;
+
+	return 0;
+}
+
+static void nbd_release(struct gendisk *disk, fmode_t mode)
+{
+	struct nbd_device *nbd = disk->private_data;
+
+	if (atomic_dec_and_test(&nbd->users)) {
+		mutex_lock(&nbd->tx_lock);
+		nbd_reset(nbd);
+		mutex_unlock(&nbd->tx_lock);
+	}
+}
+
 static const struct block_device_operations nbd_fops = {
 	.owner =	THIS_MODULE,
 	.ioctl =	nbd_ioctl,
 	.compat_ioctl =	nbd_ioctl,
+	.open =		nbd_open,
+	.release =	nbd_release,
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -1087,6 +1114,7 @@ static int __init nbd_init(void)
 		disk->private_data = &nbd_dev[i];
 		sprintf(disk->disk_name, "nbd%d", i);
 		nbd_reset(&nbd_dev[i]);
+		atomic_set(&nbd_dev[i].users, 0);
 		add_disk(disk);
 	}
 
-- 
2.1.4

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 2/2] nbd: Disallow ioctls on disconnected block device
  2016-06-24  9:29               ` [PATCH 1/2] nbd: " Markus Pargmann
@ 2016-06-24  9:29                 ` Markus Pargmann
  2016-07-16  7:42                   ` Pranay Srivastava
  2016-06-24  9:39                 ` [PATCH 1/2] nbd: make nbd device wait for its users Pranay Srivastava
                                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 48+ messages in thread
From: Markus Pargmann @ 2016-06-24  9:29 UTC (permalink / raw)
  To: Pranay Kr. Srivastava
  Cc: nbd-general, linux-kernel, Wouter Verhelst, Markus Pargmann

After NBD_DO_IT exited the block device may still be used. Make sure
that we handle intended disconnects differently and do not allow any
changed of the nbd device.

This patch should avoid that the nbd-client connects to a different server
and the users of the block device are suddenly reading/writing from a
different backend device.

For timeouts it is still possible to setup a new socket so that the
connection may be refreshed without creating problems for all users.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
---
 drivers/block/nbd.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 620660f3ff0f..39358efac73e 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -708,6 +708,18 @@ static void nbd_dev_dbg_close(struct nbd_device *nbd);
 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		       unsigned int cmd, unsigned long arg)
 {
+	/*
+	 * After a disconnect was instructed, do not allow any further actions
+	 * on the block device that would lead to a new connected endpoint.
+	 * This condition stays until nbd_reset was called either because all
+	 * users closed the device or because of CLEAR_SOCK.
+	 */
+	if (nbd->disconnect &&
+	    cmd != NBD_CLEAR_SOCK && cmd != NBD_PRINT_DEBUG) {
+		dev_info(disk_to_dev(nbd->disk), "Device is still busy after instructing a disconnect\n");
+		return -EBUSY;
+	}
+
 	switch (cmd) {
 	case NBD_DISCONNECT: {
 		struct request sreq;
@@ -733,11 +745,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 	}
 
 	case NBD_CLEAR_SOCK:
-		sock_shutdown(nbd);
-		nbd_clear_que(nbd);
-		BUG_ON(!list_empty(&nbd->queue_head));
-		BUG_ON(!list_empty(&nbd->waiting_queue));
-		kill_bdev(bdev);
+		if (nbd->disconnect) {
+			nbd_reset(nbd);
+		} else {
+			sock_shutdown(nbd);
+			nbd_clear_que(nbd);
+			BUG_ON(!list_empty(&nbd->queue_head));
+			BUG_ON(!list_empty(&nbd->waiting_queue));
+			kill_bdev(bdev);
+		}
 		return 0;
 
 	case NBD_SET_SOCK: {
@@ -812,8 +828,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		mutex_lock(&nbd->tx_lock);
 		nbd->task_recv = NULL;
 
-		if (nbd->disconnect) /* user requested, ignore socket errors */
+		if (nbd->disconnect) { /* user requested, ignore socket errors */
+			sock_shutdown(nbd);
 			error = 0;
+		}
 		if (nbd->timedout)
 			error = -ETIMEDOUT;
 
-- 
2.1.4

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 1/2] nbd: make nbd device wait for its users
  2016-06-24  9:29               ` [PATCH 1/2] nbd: " Markus Pargmann
  2016-06-24  9:29                 ` [PATCH 2/2] nbd: Disallow ioctls on disconnected block device Markus Pargmann
@ 2016-06-24  9:39                 ` Pranay Srivastava
  2016-06-24 13:40                 ` [Nbd] " Eric Blake
  2016-06-25 17:52                 ` Pranay Srivastava
  3 siblings, 0 replies; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-24  9:39 UTC (permalink / raw)
  To: Markus Pargmann; +Cc: nbd-general, linux-kernel, Wouter Verhelst

Hey Markus,

On Fri, Jun 24, 2016 at 2:59 PM, Markus Pargmann <mpa@pengutronix.de> wrote:
> From: "Pranay Kr. Srivastava" <pranjas@gmail.com>
>
> When a timeout occurs or a recv fails, then instead of abruplty killing
> nbd block device wait for it's users to finish.
>
> This is more required when filesystem(s) like ext2 or ext3 don't expect
> their buffer heads to disappear while the filesystem is mounted.
>
> Each open is counted. The blockdevice is kept open until the last user
> closes the block device. This offers the possibility as well to open a
> new socket to be used while the filesystems are mounted.
>
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
>
> [mpa: Keep the blockdevice open until all users left]
> Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
> ---
> Hi,
>
> I used your patch and changed it a bit based on my ideas. The general
> difference is that this keeps the block device open. After all users left, the
> device is reset.
>
> The followup patch then restricts access to ioctls after a disconnect. I wanted
> to avoid that anyone sets up anything new without all the old users leaving.
>
> Please let me know what you think about this.
>
> Best Regards,
>
> Markus
>
>  drivers/block/nbd.c | 62 ++++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 45 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 1efc26bf1d21..620660f3ff0f 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -69,6 +69,8 @@ struct nbd_device {
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>         struct dentry *dbg_dir;
>  #endif
> +       atomic_t users; /* Users that opened the block device */
> +       struct block_device *bdev;
>  };
>
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -655,9 +657,26 @@ static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
>         return ret;
>  }
>
> +static void nbd_bdev_reset(struct block_device *bdev)
> +{
> +       set_device_ro(bdev, false);
> +       bdev->bd_inode->i_size = 0;
> +       if (max_part > 0) {
> +               blkdev_reread_part(bdev);
> +               bdev->bd_invalidated = 1;
> +       }
> +}
> +
>  /* Reset all properties of an NBD device */
>  static void nbd_reset(struct nbd_device *nbd)
>  {
> +       sock_shutdown(nbd);
> +       nbd_clear_que(nbd);
> +       if (nbd->bdev) {
> +               kill_bdev(nbd->bdev);
> +               nbd_bdev_reset(nbd->bdev);
> +       }
> +
>         nbd->disconnect = false;
>         nbd->timedout = false;
>         nbd->blksize = 1024;
> @@ -669,16 +688,6 @@ static void nbd_reset(struct nbd_device *nbd)
>         del_timer_sync(&nbd->timeout_timer);
>  }
>
> -static void nbd_bdev_reset(struct block_device *bdev)
> -{
> -       set_device_ro(bdev, false);
> -       bdev->bd_inode->i_size = 0;
> -       if (max_part > 0) {
> -               blkdev_reread_part(bdev);
> -               bdev->bd_invalidated = 1;
> -       }
> -}
> -
>  static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
>  {
>         if (nbd->flags & NBD_FLAG_READ_ONLY)
> @@ -803,18 +812,11 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>                 mutex_lock(&nbd->tx_lock);
>                 nbd->task_recv = NULL;
>
> -               sock_shutdown(nbd);
> -               nbd_clear_que(nbd);
> -               kill_bdev(bdev);
> -               nbd_bdev_reset(bdev);
> -
>                 if (nbd->disconnect) /* user requested, ignore socket errors */
>                         error = 0;
>                 if (nbd->timedout)
>                         error = -ETIMEDOUT;
>
> -               nbd_reset(nbd);
> -
>                 return error;
>         }
>
> @@ -853,10 +855,35 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
>         return error;
>  }
>
> +static int nbd_open(struct block_device *bdev, fmode_t mode)
> +{
> +       struct nbd_device *nbd = bdev->bd_disk->private_data;
> +
> +       atomic_inc(&nbd->users);
> +
> +       if (!nbd->bdev)
> +               nbd->bdev = bdev;
> +
> +       return 0;
> +}
> +
> +static void nbd_release(struct gendisk *disk, fmode_t mode)
> +{
> +       struct nbd_device *nbd = disk->private_data;
> +
> +       if (atomic_dec_and_test(&nbd->users)) {
> +               mutex_lock(&nbd->tx_lock);
> +               nbd_reset(nbd);
> +               mutex_unlock(&nbd->tx_lock);
> +       }
> +}
> +
>  static const struct block_device_operations nbd_fops = {
>         .owner =        THIS_MODULE,
>         .ioctl =        nbd_ioctl,
>         .compat_ioctl = nbd_ioctl,
> +       .open =         nbd_open,
> +       .release =      nbd_release,
>  };
>
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -1087,6 +1114,7 @@ static int __init nbd_init(void)
>                 disk->private_data = &nbd_dev[i];
>                 sprintf(disk->disk_name, "nbd%d", i);
>                 nbd_reset(&nbd_dev[i]);
> +               atomic_set(&nbd_dev[i].users, 0);
>                 add_disk(disk);
>         }
>
> --
> 2.1.4
>

I'll be sending out rebased patch today, by next hour or so. There are
modifications to it as well like you've suggested.

-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v3 0/3] nbd: resolve bugs and limitations
  2016-06-14  8:52       ` Markus Pargmann
  2016-06-14  9:50         ` Pranay Srivastava
@ 2016-06-24 10:09         ` Pranay Kr. Srivastava
  2016-06-24 10:09           ` [PATCH v3 1/3]nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
                             ` (2 more replies)
  1 sibling, 3 replies; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-06-24 10:09 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel, w; +Cc: Pranay Kr. Srivastava


This patch series fixes the following

1) fix might_sleep warning on socket shutdown:
   Fix sock_shutdown to avoid calling kernel_sock_shutdown
   while holding spin_lock.

2) cleanup nbd_set_socket
   Simple fixes to nbd_set_socket.

3) make nbd device wait for its users.
   When a timeout or error occurs then nbd driver simply kills
   the block device. Many filesystem(s) example ext2/ext3 don't
   expect their buffer heads to disappear like that. Fix this
   by making nbd device wait for its users.

   The same work function is used to trigger the kill_bdev as well
   do a sock_shutdown, depending on either a timeout/error occured
   or a disconnect was issued.

   Also avoid scheduling the work_fn in case a timeout for a request
   has already occured.


Pranay Kr. Srivastava (3):
  fix might_sleep warning on socket shutdown
  cleanup nbd_set_socket
  make nbd device wait for its users

 drivers/block/nbd.c | 169 +++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 127 insertions(+), 42 deletions(-)

Changelog:

Rebased all patches above on git://git.pengutronix.de/git/mpa/linux-nbd.git,
commit:7ed71a8704eda7b75fbd0ed73fd0a5b6e469d250

3) make nbd device wait for its users.
   Instead of using kref_sub just use kref_init and under the bd_mutex for
   serializing on open/close.
-- 
1.9.1

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v3 1/3]nbd: fix might_sleep warning on socket shutdown
  2016-06-24 10:09         ` [PATCH v3 0/3] nbd: resolve bugs and limitations Pranay Kr. Srivastava
@ 2016-06-24 10:09           ` Pranay Kr. Srivastava
  2016-06-28  5:42             ` Pranay Srivastava
  2016-06-29  7:18             ` Markus Pargmann
  2016-06-24 10:09           ` [PATCH v3 2/3]nbd: cleanup nbd_set_socket Pranay Kr. Srivastava
  2016-06-24 10:09           ` [PATCH 3/3]nbd: make nbd device wait for its users Pranay Kr. Srivastava
  2 siblings, 2 replies; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-06-24 10:09 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel, w; +Cc: Pranay Kr. Srivastava

spinlocked ranges should be small and not contain calls into huge
subfunctions. Fix my mistake and just get the pointer to the socket
instead of doing everything with spinlock held.

Reported-by: Mikulas Patocka <mikulas@twibright.com>
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>

Changelog:
Pranay Kr. Srivastava<pranjas@gmail.com>:

1) Use spin_lock instead of irq version for sock_shutdown.

2) Use system work queue to actually trigger the shutdown of
   socket. This solves the issue when kernel_sendmsg is currently
   blocked while a timeout occurs.

Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
---
 drivers/block/nbd.c | 69 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 25 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 56f7f5d..586d946 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -39,6 +39,7 @@
 #include <asm/types.h>
 
 #include <linux/nbd.h>
+#include <linux/workqueue.h>
 
 struct nbd_device {
 	u32 flags;
@@ -69,6 +70,10 @@ struct nbd_device {
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct dentry *dbg_dir;
 #endif
+	/*
+	*This is specifically for calling sock_shutdown, for now.
+	*/
+	struct work_struct ws_shutdown;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -95,6 +100,11 @@ static int max_part;
  */
 static DEFINE_SPINLOCK(nbd_lock);
 
+/*
+ * Shutdown function for nbd_dev work struct.
+ */
+static void nbd_ws_func_shutdown(struct work_struct *);
+
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
 	return disk_to_dev(nbd->disk);
@@ -172,39 +182,35 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
  */
 static void sock_shutdown(struct nbd_device *nbd)
 {
-	spin_lock_irq(&nbd->sock_lock);
-
-	if (!nbd->sock) {
-		spin_unlock_irq(&nbd->sock_lock);
-		return;
-	}
+	struct socket *sock;
 
-	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
-	kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
-	sockfd_put(nbd->sock);
+	spin_lock(&nbd->sock_lock);
+	sock = nbd->sock;
 	nbd->sock = NULL;
-	spin_unlock_irq(&nbd->sock_lock);
+	spin_unlock(&nbd->sock_lock);
+
+	if (!sock)
+		return;
 
 	del_timer(&nbd->timeout_timer);
+	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
+	kernel_sock_shutdown(sock, SHUT_RDWR);
+	sockfd_put(sock);
 }
 
 static void nbd_xmit_timeout(unsigned long arg)
 {
 	struct nbd_device *nbd = (struct nbd_device *)arg;
-	unsigned long flags;
 
 	if (list_empty(&nbd->queue_head))
 		return;
-
-	spin_lock_irqsave(&nbd->sock_lock, flags);
-
 	nbd->timedout = true;
-
-	if (nbd->sock)
-		kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
-
-	spin_unlock_irqrestore(&nbd->sock_lock, flags);
-
+	schedule_work(&nbd->ws_shutdown);
+	/*
+	 * Make sure sender thread sees nbd->timedout.
+	 */
+	smp_wmb();
+	wake_up(&nbd->waiting_wq);
 	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
 }
 
@@ -574,8 +580,8 @@ static int nbd_thread_send(void *data)
 	while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
 		/* wait for something to do */
 		wait_event_interruptible(nbd->waiting_wq,
-					 kthread_should_stop() ||
-					 !list_empty(&nbd->waiting_queue));
+				kthread_should_stop() ||
+				!list_empty(&nbd->waiting_queue));
 
 		/* extract request */
 		if (list_empty(&nbd->waiting_queue))
@@ -583,12 +589,16 @@ static int nbd_thread_send(void *data)
 
 		spin_lock_irq(&nbd->queue_lock);
 		req = list_entry(nbd->waiting_queue.next, struct request,
-				 queuelist);
+				queuelist);
 		list_del_init(&req->queuelist);
 		spin_unlock_irq(&nbd->queue_lock);
 
-		/* handle request */
 		nbd_handle_req(nbd, req);
+		if (nbd->timedout) {
+			req->errors++;
+			nbd_end_request(nbd, req);
+		} else
+			nbd_handle_req(nbd, req);
 	}
 
 	nbd->task_send = NULL;
@@ -668,6 +678,7 @@ static void nbd_reset(struct nbd_device *nbd)
 	set_capacity(nbd->disk, 0);
 	nbd->flags = 0;
 	nbd->xmit_timeout = 0;
+	INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
 	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 	del_timer_sync(&nbd->timeout_timer);
 }
@@ -802,11 +813,11 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		error = nbd_thread_recv(nbd, bdev);
 		nbd_dev_dbg_close(nbd);
 		kthread_stop(thread);
+		sock_shutdown(nbd);
 
 		mutex_lock(&nbd->tx_lock);
 		nbd->task_recv = NULL;
 
-		sock_shutdown(nbd);
 		nbd_clear_que(nbd);
 		kill_bdev(bdev);
 		nbd_bdev_reset(bdev);
@@ -862,6 +873,14 @@ static const struct block_device_operations nbd_fops = {
 	.compat_ioctl =	nbd_ioctl,
 };
 
+static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
+{
+	struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
+			ws_shutdown);
+
+	sock_shutdown(nbd_dev);
+}
+
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 
 static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
-- 
1.9.1

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v3 2/3]nbd: cleanup nbd_set_socket
  2016-06-24 10:09         ` [PATCH v3 0/3] nbd: resolve bugs and limitations Pranay Kr. Srivastava
  2016-06-24 10:09           ` [PATCH v3 1/3]nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
@ 2016-06-24 10:09           ` Pranay Kr. Srivastava
  2016-06-24 10:09           ` [PATCH 3/3]nbd: make nbd device wait for its users Pranay Kr. Srivastava
  2 siblings, 0 replies; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-06-24 10:09 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel, w; +Cc: Pranay Kr. Srivastava

This patch
1) uses spin_lock instead of irq version.

2) removes the goto statement in case a socket
   is already assigned with simple if-else statement.

Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
---
 drivers/block/nbd.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 586d946..9223b09 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -653,17 +653,12 @@ static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
 {
 	int ret = 0;
 
-	spin_lock_irq(&nbd->sock_lock);
-
-	if (nbd->sock) {
+	spin_lock(&nbd->sock_lock);
+	if (nbd->sock)
 		ret = -EBUSY;
-		goto out;
-	}
-
-	nbd->sock = sock;
-
-out:
-	spin_unlock_irq(&nbd->sock_lock);
+	else
+		nbd->sock = sock;
+	spin_unlock(&nbd->sock_lock);
 
 	return ret;
 }
-- 
1.9.1

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 3/3]nbd: make nbd device wait for its users
  2016-06-24 10:09         ` [PATCH v3 0/3] nbd: resolve bugs and limitations Pranay Kr. Srivastava
  2016-06-24 10:09           ` [PATCH v3 1/3]nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
  2016-06-24 10:09           ` [PATCH v3 2/3]nbd: cleanup nbd_set_socket Pranay Kr. Srivastava
@ 2016-06-24 10:09           ` Pranay Kr. Srivastava
  2016-06-24 13:42             ` [Nbd] " Eric Blake
                               ` (2 more replies)
  2 siblings, 3 replies; 48+ messages in thread
From: Pranay Kr. Srivastava @ 2016-06-24 10:09 UTC (permalink / raw)
  To: mpa, nbd-general, linux-kernel, w; +Cc: Pranay Kr. Srivastava

When a timeout occurs or a recv fails, then
instead of abruplty killing nbd block device
wait for it's users to finish.

This is more required when filesystem(s) like
ext2 or ext3 don't expect their buffer heads to
disappear while the filesystem is mounted.

Each open of a nbd device is refcounted, while
the userland program [nbd-client] doing the
NBD_DO_IT ioctl would now wait for any other users
of this device before invalidating the nbd device.

A timedout or a disconnected device, if in use, can't
be used until it has been resetted. The resetting happens
when all tasks having this bdev open closes this bdev.

Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
---
 drivers/block/nbd.c | 124 ++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 96 insertions(+), 28 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 9223b09..0587bbd 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -70,10 +70,13 @@ struct nbd_device {
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct dentry *dbg_dir;
 #endif
+
 	/*
-	*This is specifically for calling sock_shutdown, for now.
-	*/
+	 *This is specifically for calling sock_shutdown, for now.
+	 */
 	struct work_struct ws_shutdown;
+	struct kref users;
+	struct completion user_completion;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -104,6 +107,8 @@ static DEFINE_SPINLOCK(nbd_lock);
  * Shutdown function for nbd_dev work struct.
  */
 static void nbd_ws_func_shutdown(struct work_struct *);
+static void nbd_kref_release(struct kref *);
+static int nbd_size_clear(struct nbd_device *, struct block_device *);
 
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
@@ -129,7 +134,7 @@ static const char *nbdcmd_to_ascii(int cmd)
 
 static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
 {
-	bdev->bd_inode->i_size = 0;
+	i_size_write(bdev->bd_inode, 0);
 	set_capacity(nbd->disk, 0);
 	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
 
@@ -141,7 +146,7 @@ static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
 	if (!nbd_is_connected(nbd))
 		return;
 
-	bdev->bd_inode->i_size = nbd->bytesize;
+	i_size_write(bdev->bd_inode, nbd->bytesize);
 	set_capacity(nbd->disk, nbd->bytesize >> 9);
 	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
 }
@@ -150,11 +155,9 @@ static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
 			int blocksize, int nr_blocks)
 {
 	int ret;
-
 	ret = set_blocksize(bdev, blocksize);
 	if (ret)
 		return ret;
-
 	nbd->blksize = blocksize;
 	nbd->bytesize = (loff_t)blocksize * (loff_t)nr_blocks;
 
@@ -202,14 +205,19 @@ static void nbd_xmit_timeout(unsigned long arg)
 {
 	struct nbd_device *nbd = (struct nbd_device *)arg;
 
+	if (nbd->timedout)
+		return;
+
 	if (list_empty(&nbd->queue_head))
 		return;
+
 	nbd->timedout = true;
-	schedule_work(&nbd->ws_shutdown);
+
 	/*
 	 * Make sure sender thread sees nbd->timedout.
 	 */
 	smp_wmb();
+	schedule_work(&nbd->ws_shutdown);
 	wake_up(&nbd->waiting_wq);
 	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
 }
@@ -476,8 +484,6 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
 		nbd_end_request(nbd, req);
 	}
 
-	nbd_size_clear(nbd, bdev);
-
 	device_remove_file(disk_to_dev(nbd->disk), &dev_attr_pid);
 
 	nbd->task_recv = NULL;
@@ -580,8 +586,8 @@ static int nbd_thread_send(void *data)
 	while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
 		/* wait for something to do */
 		wait_event_interruptible(nbd->waiting_wq,
-				kthread_should_stop() ||
-				!list_empty(&nbd->waiting_queue));
+					 kthread_should_stop() ||
+					 !list_empty(&nbd->waiting_queue));
 
 		/* extract request */
 		if (list_empty(&nbd->waiting_queue))
@@ -589,11 +595,11 @@ static int nbd_thread_send(void *data)
 
 		spin_lock_irq(&nbd->queue_lock);
 		req = list_entry(nbd->waiting_queue.next, struct request,
-				queuelist);
+				 queuelist);
 		list_del_init(&req->queuelist);
 		spin_unlock_irq(&nbd->queue_lock);
 
-		nbd_handle_req(nbd, req);
+		/* handle request */
 		if (nbd->timedout) {
 			req->errors++;
 			nbd_end_request(nbd, req);
@@ -654,12 +660,13 @@ static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
 	int ret = 0;
 
 	spin_lock(&nbd->sock_lock);
-	if (nbd->sock)
+
+	if (nbd->sock || nbd->timedout)
 		ret = -EBUSY;
 	else
 		nbd->sock = sock;
-	spin_unlock(&nbd->sock_lock);
 
+	spin_unlock(&nbd->sock_lock);
 	return ret;
 }
 
@@ -674,6 +681,7 @@ static void nbd_reset(struct nbd_device *nbd)
 	nbd->flags = 0;
 	nbd->xmit_timeout = 0;
 	INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
+	init_completion(&nbd->user_completion);
 	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 	del_timer_sync(&nbd->timeout_timer);
 }
@@ -708,6 +716,9 @@ static void nbd_dev_dbg_close(struct nbd_device *nbd);
 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		       unsigned int cmd, unsigned long arg)
 {
+	if (nbd->timedout || nbd->disconnect)
+		return -EBUSY;
+
 	switch (cmd) {
 	case NBD_DISCONNECT: {
 		struct request sreq;
@@ -737,7 +748,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		nbd_clear_que(nbd);
 		BUG_ON(!list_empty(&nbd->queue_head));
 		BUG_ON(!list_empty(&nbd->waiting_queue));
-		kill_bdev(bdev);
 		return 0;
 
 	case NBD_SET_SOCK: {
@@ -756,7 +766,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 
 	case NBD_SET_BLKSIZE: {
 		loff_t bsize = div_s64(nbd->bytesize, arg);
-
 		return nbd_size_set(nbd, bdev, arg, bsize);
 	}
 
@@ -808,22 +817,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		error = nbd_thread_recv(nbd, bdev);
 		nbd_dev_dbg_close(nbd);
 		kthread_stop(thread);
-		sock_shutdown(nbd);
 
-		mutex_lock(&nbd->tx_lock);
-		nbd->task_recv = NULL;
-
-		nbd_clear_que(nbd);
-		kill_bdev(bdev);
-		nbd_bdev_reset(bdev);
+		sock_shutdown(nbd);
 
 		if (nbd->disconnect) /* user requested, ignore socket errors */
 			error = 0;
 		if (nbd->timedout)
 			error = -ETIMEDOUT;
 
-		nbd_reset(nbd);
+		mutex_lock(&nbd->tx_lock);
+		nbd_clear_que(nbd);
+		nbd->disconnect = true; /* To kill bdev*/
+		mutex_unlock(&nbd->tx_lock);
+		cancel_work_sync(&nbd->ws_shutdown);
+		kref_put(&nbd->users, nbd_kref_release);
+		wait_for_completion(&nbd->user_completion);
 
+		mutex_lock(&bdev->bd_mutex);
+		if (!kref_get_unless_zero(&nbd->users))
+			kref_init(&nbd->users);
+		mutex_unlock(&bdev->bd_mutex);
+
+		mutex_lock(&nbd->tx_lock);
+		nbd_reset(nbd);
 		return error;
 	}
 
@@ -861,19 +877,71 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 
 	return error;
 }
+static void nbd_kref_release(struct kref *kref_users)
+{
+	struct nbd_device *nbd = container_of(kref_users, struct nbd_device,
+			users);
+	schedule_work(&nbd->ws_shutdown);
+}
+
+static int nbd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct nbd_device *nbd_dev = bdev->bd_disk->private_data;
+
+	if (!kref_get_unless_zero(&nbd_dev->users))
+		kref_init(&nbd_dev->users);
+
+	pr_debug("Opening nbd_dev %s. Active users = %u\n",
+			bdev->bd_disk->disk_name,
+			atomic_read(&nbd_dev->users.refcount)
+		);
+	return 0;
+}
+
+static void nbd_release(struct gendisk *disk, fmode_t mode)
+{
+	struct nbd_device *nbd_dev = disk->private_data;
+
+	kref_put(&nbd_dev->users,  nbd_kref_release);
+
+	pr_debug("Closing nbd_dev %s. Active users = %u\n",
+			disk->disk_name,
+			atomic_read(&nbd_dev->users.refcount)
+		);
+}
 
 static const struct block_device_operations nbd_fops = {
 	.owner =	THIS_MODULE,
 	.ioctl =	nbd_ioctl,
 	.compat_ioctl =	nbd_ioctl,
+	.open =		nbd_open,
+	.release =	nbd_release
 };
 
+
 static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
 {
 	struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
-			ws_shutdown);
-
-	sock_shutdown(nbd_dev);
+							ws_shutdown);
+	struct block_device *bdev = bdget(part_devt(
+						dev_to_part(nbd_to_dev(nbd_dev))
+						)
+					);
+	BUG_ON(!bdev);
+	if (nbd_dev->timedout)
+		sock_shutdown(nbd_dev);
+
+	if (nbd_dev->disconnect) {
+		mutex_lock(&nbd_dev->tx_lock);
+		nbd_dev->task_recv = NULL;
+		nbd_clear_que(nbd_dev);
+		kill_bdev(bdev);
+		nbd_bdev_reset(bdev);
+		mutex_unlock(&nbd_dev->tx_lock);
+		nbd_size_clear(nbd_dev, bdev);
+		complete(&nbd_dev->user_completion);
+	}
+	bdput(bdev);
 }
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
-- 
1.9.1

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Nbd] [PATCH 1/2] nbd: make nbd device wait for its users
  2016-06-24  9:29               ` [PATCH 1/2] nbd: " Markus Pargmann
  2016-06-24  9:29                 ` [PATCH 2/2] nbd: Disallow ioctls on disconnected block device Markus Pargmann
  2016-06-24  9:39                 ` [PATCH 1/2] nbd: make nbd device wait for its users Pranay Srivastava
@ 2016-06-24 13:40                 ` Eric Blake
  2016-06-25 17:52                 ` Pranay Srivastava
  3 siblings, 0 replies; 48+ messages in thread
From: Eric Blake @ 2016-06-24 13:40 UTC (permalink / raw)
  To: Markus Pargmann, Pranay Kr. Srivastava
  Cc: nbd-general, Wouter Verhelst, linux-kernel


[-- Attachment #1.1: Type: text/plain, Size: 1003 bytes --]

On 06/24/2016 03:29 AM, Markus Pargmann wrote:
> From: "Pranay Kr. Srivastava" <pranjas@gmail.com>
> 
> When a timeout occurs or a recv fails, then instead of abruplty killing

s/abruplty/abruptly/

> nbd block device wait for it's users to finish.

s/it's/its/ (remember, "it's" is only usable where "it is" would also be
appropriate)

> 
> This is more required when filesystem(s) like ext2 or ext3 don't expect
> their buffer heads to disappear while the filesystem is mounted.
> 
> Each open is counted. The blockdevice is kept open until the last user
> closes the block device. This offers the possibility as well to open a
> new socket to be used while the filesystems are mounted.
> 
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> 
> [mpa: Keep the blockdevice open until all users left]
> Signed-off-by: Markus Pargmann <mpa@pengutronix.de>



-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Nbd] [PATCH 3/3]nbd: make nbd device wait for its users
  2016-06-24 10:09           ` [PATCH 3/3]nbd: make nbd device wait for its users Pranay Kr. Srivastava
@ 2016-06-24 13:42             ` Eric Blake
  2016-06-25 17:56               ` Pranay Srivastava
  2016-06-25 18:01             ` Pranay Srivastava
  2016-06-29  7:06             ` Markus Pargmann
  2 siblings, 1 reply; 48+ messages in thread
From: Eric Blake @ 2016-06-24 13:42 UTC (permalink / raw)
  To: Pranay Kr. Srivastava, mpa, nbd-general, linux-kernel, w


[-- Attachment #1.1: Type: text/plain, Size: 1113 bytes --]

On 06/24/2016 04:09 AM, Pranay Kr. Srivastava wrote:
> When a timeout occurs or a recv fails, then
> instead of abruplty killing nbd block device

s/abruplty/abruptly/

> wait for it's users to finish.

s/it's/its/

> 
> This is more required when filesystem(s) like
> ext2 or ext3 don't expect their buffer heads to
> disappear while the filesystem is mounted.
> 
> Each open of a nbd device is refcounted, while
> the userland program [nbd-client] doing the
> NBD_DO_IT ioctl would now wait for any other users
> of this device before invalidating the nbd device.
> 
> A timedout or a disconnected device, if in use, can't
> be used until it has been resetted. The resetting happens

s/resetted/reset/

> when all tasks having this bdev open closes this bdev.
> 
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> ---
>  drivers/block/nbd.c | 124 ++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 96 insertions(+), 28 deletions(-)
> 


-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 1/2] nbd: make nbd device wait for its users
  2016-06-24  9:29               ` [PATCH 1/2] nbd: " Markus Pargmann
                                   ` (2 preceding siblings ...)
  2016-06-24 13:40                 ` [Nbd] " Eric Blake
@ 2016-06-25 17:52                 ` Pranay Srivastava
  2016-06-29  6:57                   ` Markus Pargmann
  3 siblings, 1 reply; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-25 17:52 UTC (permalink / raw)
  To: Markus Pargmann; +Cc: nbd-general, linux-kernel, Wouter Verhelst

On Fri, Jun 24, 2016 at 2:59 PM, Markus Pargmann <mpa@pengutronix.de> wrote:
> From: "Pranay Kr. Srivastava" <pranjas@gmail.com>
>
> When a timeout occurs or a recv fails, then instead of abruplty killing
> nbd block device wait for it's users to finish.
>
> This is more required when filesystem(s) like ext2 or ext3 don't expect
> their buffer heads to disappear while the filesystem is mounted.
>
> Each open is counted. The blockdevice is kept open until the last user
> closes the block device. This offers the possibility as well to open a
> new socket to be used while the filesystems are mounted.
>
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
>
> [mpa: Keep the blockdevice open until all users left]
> Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
> ---
> Hi,
>
> I used your patch and changed it a bit based on my ideas. The general
> difference is that this keeps the block device open. After all users left, the
> device is reset.
>
> The followup patch then restricts access to ioctls after a disconnect. I wanted
> to avoid that anyone sets up anything new without all the old users leaving.
>
> Please let me know what you think about this.
>
> Best Regards,
>
> Markus
>
>  drivers/block/nbd.c | 62 ++++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 45 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 1efc26bf1d21..620660f3ff0f 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -69,6 +69,8 @@ struct nbd_device {
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>         struct dentry *dbg_dir;
>  #endif
> +       atomic_t users; /* Users that opened the block device */

We don't need to put bdev in nbd struct. We can get it via gendisk and
bdget/bdput.
> +       struct block_device *bdev;
>  };
>
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -655,9 +657,26 @@ static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
>         return ret;
>  }
>
> +static void nbd_bdev_reset(struct block_device *bdev)
> +{
> +       set_device_ro(bdev, false);
> +       bdev->bd_inode->i_size = 0;

i_size_write should be better I guess.

> +       if (max_part > 0) {
> +               blkdev_reread_part(bdev);
> +               bdev->bd_invalidated = 1;
> +       }
> +}
> +
>  /* Reset all properties of an NBD device */
>  static void nbd_reset(struct nbd_device *nbd)
>  {
> +       sock_shutdown(nbd);
> +       nbd_clear_que(nbd);
> +       if (nbd->bdev) {
> +               kill_bdev(nbd->bdev);
> +               nbd_bdev_reset(nbd->bdev);
> +       }
> +
>         nbd->disconnect = false;
>         nbd->timedout = false;
>         nbd->blksize = 1024;

I actually forgot to ask about this blksize. We do set_blocksize call
for bdev, but
shouldn't we instead do blkdev_logicial_blocksize?

Mount would set the blocksize depending on the filesystem's block size and the
block device logical block size supported. Should we just get rid of this?

> @@ -669,16 +688,6 @@ static void nbd_reset(struct nbd_device *nbd)
>         del_timer_sync(&nbd->timeout_timer);
>  }
>
> -static void nbd_bdev_reset(struct block_device *bdev)
> -{
> -       set_device_ro(bdev, false);
> -       bdev->bd_inode->i_size = 0;
> -       if (max_part > 0) {
> -               blkdev_reread_part(bdev);
> -               bdev->bd_invalidated = 1;
> -       }
> -}
> -
>  static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
>  {
>         if (nbd->flags & NBD_FLAG_READ_ONLY)
> @@ -803,18 +812,11 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,

How about disallowing any ioctl until the nbd_device has been reset?
Perhaps, throw error
in open instead of checking here?

>                 mutex_lock(&nbd->tx_lock);
>                 nbd->task_recv = NULL;
>
> -               sock_shutdown(nbd);
> -               nbd_clear_que(nbd);
> -               kill_bdev(bdev);
> -               nbd_bdev_reset(bdev);
> -
>                 if (nbd->disconnect) /* user requested, ignore socket errors */
>                         error = 0;
>                 if (nbd->timedout)
>                         error = -ETIMEDOUT;
>

Doesn't this change the semantics for user space? NBD_DO_IT is
supposed to be over either on error or a
user disconnect not before that right[?].

> -               nbd_reset(nbd);
> -
>                 return error;
>         }
>
> @@ -853,10 +855,35 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
>         return error;
>  }
>
> +static int nbd_open(struct block_device *bdev, fmode_t mode)
> +{
> +       struct nbd_device *nbd = bdev->bd_disk->private_data;
> +
> +       atomic_inc(&nbd->users);
> +
> +       if (!nbd->bdev)
> +               nbd->bdev = bdev;
> +
> +       return 0;
> +}
> +
> +static void nbd_release(struct gendisk *disk, fmode_t mode)
> +{
> +       struct nbd_device *nbd = disk->private_data;
> +
> +       if (atomic_dec_and_test(&nbd->users)) {
> +               mutex_lock(&nbd->tx_lock);
> +               nbd_reset(nbd);
> +               mutex_unlock(&nbd->tx_lock);
> +       }
> +}
> +
What if the following happens,

1) nbd-client [nbd-c1] attaches the block device

2) mount this nbd device

3) somebody goes crazy and does nbd-client -d <nbd_dev>

Step 3) would cause a DISCONNECT and a CLEAR_SOCK ioctl to be fired,
but in CLEAR_SOCK there's a call to kill_bdev, so I guess that needs
to go as well[?].

>  static const struct block_device_operations nbd_fops = {
>         .owner =        THIS_MODULE,
>         .ioctl =        nbd_ioctl,
>         .compat_ioctl = nbd_ioctl,
> +       .open =         nbd_open,
> +       .release =      nbd_release,
>  };
>
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -1087,6 +1114,7 @@ static int __init nbd_init(void)
>                 disk->private_data = &nbd_dev[i];
>                 sprintf(disk->disk_name, "nbd%d", i);
>                 nbd_reset(&nbd_dev[i]);
> +               atomic_set(&nbd_dev[i].users, 0);
>                 add_disk(disk);
>         }
>
> --
> 2.1.4
>

-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Nbd] [PATCH 3/3]nbd: make nbd device wait for its users
  2016-06-24 13:42             ` [Nbd] " Eric Blake
@ 2016-06-25 17:56               ` Pranay Srivastava
  0 siblings, 0 replies; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-25 17:56 UTC (permalink / raw)
  To: Eric Blake; +Cc: Markus Pargmann, nbd-general, linux-kernel, Wouter Verhelst

Hi Eric,

On Fri, Jun 24, 2016 at 7:12 PM, Eric Blake <eblake@redhat.com> wrote:
> On 06/24/2016 04:09 AM, Pranay Kr. Srivastava wrote:
>> When a timeout occurs or a recv fails, then
>> instead of abruplty killing nbd block device
>
> s/abruplty/abruptly/
>
>> wait for it's users to finish.
>
> s/it's/its/
>
>>
>> This is more required when filesystem(s) like
>> ext2 or ext3 don't expect their buffer heads to
>> disappear while the filesystem is mounted.
>>
>> Each open of a nbd device is refcounted, while
>> the userland program [nbd-client] doing the
>> NBD_DO_IT ioctl would now wait for any other users
>> of this device before invalidating the nbd device.
>>
>> A timedout or a disconnected device, if in use, can't
>> be used until it has been resetted. The resetting happens
>
> s/resetted/reset/
>

Thanks for going through the patch. Can I get some review on the
code as well so I can fix and resend that too, along with fixes of grammatical
errors.

>> when all tasks having this bdev open closes this bdev.
>>
>> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
>> ---
>>  drivers/block/nbd.c | 124 ++++++++++++++++++++++++++++++++++++++++------------
>>  1 file changed, 96 insertions(+), 28 deletions(-)
>>
>
>
> --
> Eric Blake   eblake redhat com    +1-919-301-3266
> Libvirt virtualization library http://libvirt.org
>



-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 3/3]nbd: make nbd device wait for its users
  2016-06-24 10:09           ` [PATCH 3/3]nbd: make nbd device wait for its users Pranay Kr. Srivastava
  2016-06-24 13:42             ` [Nbd] " Eric Blake
@ 2016-06-25 18:01             ` Pranay Srivastava
  2016-06-29  7:06             ` Markus Pargmann
  2 siblings, 0 replies; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-25 18:01 UTC (permalink / raw)
  To: Markus Pargmann, nbd-general, linux-kernel, Wouter Verhelst
  Cc: Pranay Kr. Srivastava

Hi

On Fri, Jun 24, 2016 at 3:39 PM, Pranay Kr. Srivastava
<pranjas@gmail.com> wrote:
> When a timeout occurs or a recv fails, then
> instead of abruplty killing nbd block device
> wait for it's users to finish.
>
> This is more required when filesystem(s) like
> ext2 or ext3 don't expect their buffer heads to
> disappear while the filesystem is mounted.
>
> Each open of a nbd device is refcounted, while
> the userland program [nbd-client] doing the
> NBD_DO_IT ioctl would now wait for any other users
> of this device before invalidating the nbd device.
>
> A timedout or a disconnected device, if in use, can't
> be used until it has been resetted. The resetting happens
> when all tasks having this bdev open closes this bdev.
>
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> ---
>  drivers/block/nbd.c | 124 ++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 96 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 9223b09..0587bbd 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -70,10 +70,13 @@ struct nbd_device {
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>         struct dentry *dbg_dir;
>  #endif
> +
>         /*
> -       *This is specifically for calling sock_shutdown, for now.
> -       */
> +        *This is specifically for calling sock_shutdown, for now.
> +        */
>         struct work_struct ws_shutdown;
> +       struct kref users;
> +       struct completion user_completion;
>  };
>
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -104,6 +107,8 @@ static DEFINE_SPINLOCK(nbd_lock);
>   * Shutdown function for nbd_dev work struct.
>   */
>  static void nbd_ws_func_shutdown(struct work_struct *);
> +static void nbd_kref_release(struct kref *);
> +static int nbd_size_clear(struct nbd_device *, struct block_device *);
>
>  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>  {
> @@ -129,7 +134,7 @@ static const char *nbdcmd_to_ascii(int cmd)
>
>  static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
>  {
> -       bdev->bd_inode->i_size = 0;
> +       i_size_write(bdev->bd_inode, 0);
>         set_capacity(nbd->disk, 0);
>         kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
>
> @@ -141,7 +146,7 @@ static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
>         if (!nbd_is_connected(nbd))
>                 return;
>
> -       bdev->bd_inode->i_size = nbd->bytesize;
> +       i_size_write(bdev->bd_inode, nbd->bytesize);
>         set_capacity(nbd->disk, nbd->bytesize >> 9);
>         kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
>  }
> @@ -150,11 +155,9 @@ static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
>                         int blocksize, int nr_blocks)
>  {
>         int ret;
> -
>         ret = set_blocksize(bdev, blocksize);
>         if (ret)
>                 return ret;
> -
>         nbd->blksize = blocksize;
>         nbd->bytesize = (loff_t)blocksize * (loff_t)nr_blocks;
>
> @@ -202,14 +205,19 @@ static void nbd_xmit_timeout(unsigned long arg)
>  {
>         struct nbd_device *nbd = (struct nbd_device *)arg;
>
> +       if (nbd->timedout)
> +               return;
> +
>         if (list_empty(&nbd->queue_head))
>                 return;
> +
>         nbd->timedout = true;
> -       schedule_work(&nbd->ws_shutdown);
> +
>         /*
>          * Make sure sender thread sees nbd->timedout.
>          */
>         smp_wmb();
> +       schedule_work(&nbd->ws_shutdown);
>         wake_up(&nbd->waiting_wq);
>         dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
>  }
> @@ -476,8 +484,6 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
>                 nbd_end_request(nbd, req);
>         }
>
> -       nbd_size_clear(nbd, bdev);
> -
>         device_remove_file(disk_to_dev(nbd->disk), &dev_attr_pid);
>
>         nbd->task_recv = NULL;
> @@ -580,8 +586,8 @@ static int nbd_thread_send(void *data)
>         while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
>                 /* wait for something to do */
>                 wait_event_interruptible(nbd->waiting_wq,
> -                               kthread_should_stop() ||
> -                               !list_empty(&nbd->waiting_queue));
> +                                        kthread_should_stop() ||
> +                                        !list_empty(&nbd->waiting_queue));
>
>                 /* extract request */
>                 if (list_empty(&nbd->waiting_queue))
> @@ -589,11 +595,11 @@ static int nbd_thread_send(void *data)
>
>                 spin_lock_irq(&nbd->queue_lock);
>                 req = list_entry(nbd->waiting_queue.next, struct request,
> -                               queuelist);
> +                                queuelist);
>                 list_del_init(&req->queuelist);
>                 spin_unlock_irq(&nbd->queue_lock);
>
> -               nbd_handle_req(nbd, req);
> +               /* handle request */
>                 if (nbd->timedout) {
>                         req->errors++;
>                         nbd_end_request(nbd, req);
> @@ -654,12 +660,13 @@ static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
>         int ret = 0;
>
>         spin_lock(&nbd->sock_lock);
> -       if (nbd->sock)
> +
> +       if (nbd->sock || nbd->timedout)
>                 ret = -EBUSY;
>         else
>                 nbd->sock = sock;
> -       spin_unlock(&nbd->sock_lock);
>
> +       spin_unlock(&nbd->sock_lock);
>         return ret;
>  }
>
> @@ -674,6 +681,7 @@ static void nbd_reset(struct nbd_device *nbd)
>         nbd->flags = 0;
>         nbd->xmit_timeout = 0;
>         INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
> +       init_completion(&nbd->user_completion);
>         queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>         del_timer_sync(&nbd->timeout_timer);
>  }
> @@ -708,6 +716,9 @@ static void nbd_dev_dbg_close(struct nbd_device *nbd);
>  static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>                        unsigned int cmd, unsigned long arg)
>  {
> +       if (nbd->timedout || nbd->disconnect)
> +               return -EBUSY;
> +
>         switch (cmd) {
>         case NBD_DISCONNECT: {
>                 struct request sreq;
> @@ -737,7 +748,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>                 nbd_clear_que(nbd);
>                 BUG_ON(!list_empty(&nbd->queue_head));
>                 BUG_ON(!list_empty(&nbd->waiting_queue));
> -               kill_bdev(bdev);
>                 return 0;
>
>         case NBD_SET_SOCK: {
> @@ -756,7 +766,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>
>         case NBD_SET_BLKSIZE: {
>                 loff_t bsize = div_s64(nbd->bytesize, arg);
> -
>                 return nbd_size_set(nbd, bdev, arg, bsize);
>         }
>
> @@ -808,22 +817,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>                 error = nbd_thread_recv(nbd, bdev);
>                 nbd_dev_dbg_close(nbd);
>                 kthread_stop(thread);
> -               sock_shutdown(nbd);
>
> -               mutex_lock(&nbd->tx_lock);
> -               nbd->task_recv = NULL;
> -
> -               nbd_clear_que(nbd);
> -               kill_bdev(bdev);
> -               nbd_bdev_reset(bdev);
> +               sock_shutdown(nbd);
>
>                 if (nbd->disconnect) /* user requested, ignore socket errors */
>                         error = 0;
>                 if (nbd->timedout)
>                         error = -ETIMEDOUT;
>
> -               nbd_reset(nbd);
> +               mutex_lock(&nbd->tx_lock);
> +               nbd_clear_que(nbd);
> +               nbd->disconnect = true; /* To kill bdev*/
> +               mutex_unlock(&nbd->tx_lock);
> +               cancel_work_sync(&nbd->ws_shutdown);
> +               kref_put(&nbd->users, nbd_kref_release);
> +               wait_for_completion(&nbd->user_completion);
>
> +               mutex_lock(&bdev->bd_mutex);
> +               if (!kref_get_unless_zero(&nbd->users))
> +                       kref_init(&nbd->users);
> +               mutex_unlock(&bdev->bd_mutex);
> +
> +               mutex_lock(&nbd->tx_lock);
> +               nbd_reset(nbd);
>                 return error;
>         }
>
> @@ -861,19 +877,71 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
>
>         return error;
>  }
> +static void nbd_kref_release(struct kref *kref_users)
> +{
> +       struct nbd_device *nbd = container_of(kref_users, struct nbd_device,
> +                       users);
> +       schedule_work(&nbd->ws_shutdown);
> +}
> +
> +static int nbd_open(struct block_device *bdev, fmode_t mode)
> +{
> +       struct nbd_device *nbd_dev = bdev->bd_disk->private_data;
> +
> +       if (!kref_get_unless_zero(&nbd_dev->users))
> +               kref_init(&nbd_dev->users);
> +
> +       pr_debug("Opening nbd_dev %s. Active users = %u\n",
> +                       bdev->bd_disk->disk_name,
> +                       atomic_read(&nbd_dev->users.refcount)
> +               );
> +       return 0;
> +}
> +
> +static void nbd_release(struct gendisk *disk, fmode_t mode)
> +{
> +       struct nbd_device *nbd_dev = disk->private_data;
> +
> +       kref_put(&nbd_dev->users,  nbd_kref_release);
> +
> +       pr_debug("Closing nbd_dev %s. Active users = %u\n",
> +                       disk->disk_name,
> +                       atomic_read(&nbd_dev->users.refcount)
> +               );
> +}
>
>  static const struct block_device_operations nbd_fops = {
>         .owner =        THIS_MODULE,
>         .ioctl =        nbd_ioctl,
>         .compat_ioctl = nbd_ioctl,
> +       .open =         nbd_open,
> +       .release =      nbd_release
>  };
>
> +
>  static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
>  {
>         struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
> -                       ws_shutdown);
> -
> -       sock_shutdown(nbd_dev);
> +                                                       ws_shutdown);
> +       struct block_device *bdev = bdget(part_devt(
> +                                               dev_to_part(nbd_to_dev(nbd_dev))
> +                                               )
> +                                       );
> +       BUG_ON(!bdev);
> +       if (nbd_dev->timedout)
> +               sock_shutdown(nbd_dev);
> +
> +       if (nbd_dev->disconnect) {
> +               mutex_lock(&nbd_dev->tx_lock);
> +               nbd_dev->task_recv = NULL;
> +               nbd_clear_que(nbd_dev);
> +               kill_bdev(bdev);
> +               nbd_bdev_reset(bdev);
> +               mutex_unlock(&nbd_dev->tx_lock);
> +               nbd_size_clear(nbd_dev, bdev);
> +               complete(&nbd_dev->user_completion);
> +       }
> +       bdput(bdev);
>  }
>
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> --
> 1.9.1
>

This is supposed to be v3 3/3. Perhaps I gave an incorrect message id
while sending
Should I resend the series afresh [Markus?].

-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 1/3]nbd: fix might_sleep warning on socket shutdown
  2016-06-24 10:09           ` [PATCH v3 1/3]nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
@ 2016-06-28  5:42             ` Pranay Srivastava
  2016-06-29  7:18             ` Markus Pargmann
  1 sibling, 0 replies; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-28  5:42 UTC (permalink / raw)
  To: Markus Pargmann, nbd-general, linux-kernel, Wouter Verhelst
  Cc: Pranay Kr. Srivastava

Hi Markus,

On Fri, Jun 24, 2016 at 3:39 PM, Pranay Kr. Srivastava
<pranjas@gmail.com> wrote:
> spinlocked ranges should be small and not contain calls into huge
> subfunctions. Fix my mistake and just get the pointer to the socket
> instead of doing everything with spinlock held.
>
> Reported-by: Mikulas Patocka <mikulas@twibright.com>
> Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
>
> Changelog:
> Pranay Kr. Srivastava<pranjas@gmail.com>:
>
> 1) Use spin_lock instead of irq version for sock_shutdown.
>
> 2) Use system work queue to actually trigger the shutdown of
>    socket. This solves the issue when kernel_sendmsg is currently
>    blocked while a timeout occurs.
>
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> ---
>  drivers/block/nbd.c | 69 ++++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 44 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 56f7f5d..586d946 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -39,6 +39,7 @@
>  #include <asm/types.h>
>
>  #include <linux/nbd.h>
> +#include <linux/workqueue.h>
>
>  struct nbd_device {
>         u32 flags;
> @@ -69,6 +70,10 @@ struct nbd_device {
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>         struct dentry *dbg_dir;
>  #endif
> +       /*
> +       *This is specifically for calling sock_shutdown, for now.
> +       */
> +       struct work_struct ws_shutdown;
>  };
>
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -95,6 +100,11 @@ static int max_part;
>   */
>  static DEFINE_SPINLOCK(nbd_lock);
>
> +/*
> + * Shutdown function for nbd_dev work struct.
> + */
> +static void nbd_ws_func_shutdown(struct work_struct *);
> +
>  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>  {
>         return disk_to_dev(nbd->disk);
> @@ -172,39 +182,35 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
>   */
>  static void sock_shutdown(struct nbd_device *nbd)
>  {
> -       spin_lock_irq(&nbd->sock_lock);
> -
> -       if (!nbd->sock) {
> -               spin_unlock_irq(&nbd->sock_lock);
> -               return;
> -       }
> +       struct socket *sock;
>
> -       dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
> -       kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
> -       sockfd_put(nbd->sock);
> +       spin_lock(&nbd->sock_lock);
> +       sock = nbd->sock;
>         nbd->sock = NULL;
> -       spin_unlock_irq(&nbd->sock_lock);
> +       spin_unlock(&nbd->sock_lock);
> +
> +       if (!sock)
> +               return;
>
>         del_timer(&nbd->timeout_timer);
> +       dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
> +       kernel_sock_shutdown(sock, SHUT_RDWR);
> +       sockfd_put(sock);
>  }
>
>  static void nbd_xmit_timeout(unsigned long arg)
>  {
>         struct nbd_device *nbd = (struct nbd_device *)arg;
> -       unsigned long flags;
>
>         if (list_empty(&nbd->queue_head))
>                 return;
> -
> -       spin_lock_irqsave(&nbd->sock_lock, flags);
> -
>         nbd->timedout = true;
> -
> -       if (nbd->sock)
> -               kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
> -
> -       spin_unlock_irqrestore(&nbd->sock_lock, flags);
> -
> +       schedule_work(&nbd->ws_shutdown);
> +       /*
> +        * Make sure sender thread sees nbd->timedout.
> +        */
> +       smp_wmb();
> +       wake_up(&nbd->waiting_wq);
>         dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
>  }
>
> @@ -574,8 +580,8 @@ static int nbd_thread_send(void *data)
>         while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
>                 /* wait for something to do */
>                 wait_event_interruptible(nbd->waiting_wq,
> -                                        kthread_should_stop() ||
> -                                        !list_empty(&nbd->waiting_queue));
> +                               kthread_should_stop() ||
> +                               !list_empty(&nbd->waiting_queue));
>
>                 /* extract request */
>                 if (list_empty(&nbd->waiting_queue))
> @@ -583,12 +589,16 @@ static int nbd_thread_send(void *data)
>
>                 spin_lock_irq(&nbd->queue_lock);
>                 req = list_entry(nbd->waiting_queue.next, struct request,
> -                                queuelist);
> +                               queuelist);
>                 list_del_init(&req->queuelist);
>                 spin_unlock_irq(&nbd->queue_lock);
>
> -               /* handle request */
>                 nbd_handle_req(nbd, req);
> +               if (nbd->timedout) {
> +                       req->errors++;
> +                       nbd_end_request(nbd, req);
> +               } else
> +                       nbd_handle_req(nbd, req);
>         }
>
>         nbd->task_send = NULL;
> @@ -668,6 +678,7 @@ static void nbd_reset(struct nbd_device *nbd)
>         set_capacity(nbd->disk, 0);
>         nbd->flags = 0;
>         nbd->xmit_timeout = 0;
> +       INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
>         queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>         del_timer_sync(&nbd->timeout_timer);
>  }
> @@ -802,11 +813,11 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>                 error = nbd_thread_recv(nbd, bdev);
>                 nbd_dev_dbg_close(nbd);
>                 kthread_stop(thread);
> +               sock_shutdown(nbd);
>
>                 mutex_lock(&nbd->tx_lock);
>                 nbd->task_recv = NULL;
>
> -               sock_shutdown(nbd);
>                 nbd_clear_que(nbd);
>                 kill_bdev(bdev);
>                 nbd_bdev_reset(bdev);
> @@ -862,6 +873,14 @@ static const struct block_device_operations nbd_fops = {
>         .compat_ioctl = nbd_ioctl,
>  };
>
> +static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
> +{
> +       struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
> +                       ws_shutdown);
> +
> +       sock_shutdown(nbd_dev);
> +}
> +
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>
>  static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
> --
> 1.9.1
>

Do you think this series can be reviewed for 4.7?


-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 1/2] nbd: make nbd device wait for its users
  2016-06-25 17:52                 ` Pranay Srivastava
@ 2016-06-29  6:57                   ` Markus Pargmann
  0 siblings, 0 replies; 48+ messages in thread
From: Markus Pargmann @ 2016-06-29  6:57 UTC (permalink / raw)
  To: Pranay Srivastava; +Cc: nbd-general, linux-kernel, Wouter Verhelst

[-- Attachment #1: Type: text/plain, Size: 8228 bytes --]

Hi,

On Saturday 25 June 2016 23:22:06 Pranay Srivastava wrote:
> On Fri, Jun 24, 2016 at 2:59 PM, Markus Pargmann <mpa@pengutronix.de> wrote:
> > From: "Pranay Kr. Srivastava" <pranjas@gmail.com>
> >
> > When a timeout occurs or a recv fails, then instead of abruplty killing
> > nbd block device wait for it's users to finish.
> >
> > This is more required when filesystem(s) like ext2 or ext3 don't expect
> > their buffer heads to disappear while the filesystem is mounted.
> >
> > Each open is counted. The blockdevice is kept open until the last user
> > closes the block device. This offers the possibility as well to open a
> > new socket to be used while the filesystems are mounted.
> >
> > Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> >
> > [mpa: Keep the blockdevice open until all users left]
> > Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
> > ---
> > Hi,
> >
> > I used your patch and changed it a bit based on my ideas. The general
> > difference is that this keeps the block device open. After all users left, the
> > device is reset.
> >
> > The followup patch then restricts access to ioctls after a disconnect. I wanted
> > to avoid that anyone sets up anything new without all the old users leaving.
> >
> > Please let me know what you think about this.
> >
> > Best Regards,
> >
> > Markus
> >
> >  drivers/block/nbd.c | 62 ++++++++++++++++++++++++++++++++++++++---------------
> >  1 file changed, 45 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> > index 1efc26bf1d21..620660f3ff0f 100644
> > --- a/drivers/block/nbd.c
> > +++ b/drivers/block/nbd.c
> > @@ -69,6 +69,8 @@ struct nbd_device {
> >  #if IS_ENABLED(CONFIG_DEBUG_FS)
> >         struct dentry *dbg_dir;
> >  #endif
> > +       atomic_t users; /* Users that opened the block device */
> 
> We don't need to put bdev in nbd struct. We can get it via gendisk and
> bdget/bdput.

Indeed, thanks.

> > +       struct block_device *bdev;
> >  };
> >
> >  #if IS_ENABLED(CONFIG_DEBUG_FS)
> > @@ -655,9 +657,26 @@ static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
> >         return ret;
> >  }
> >
> > +static void nbd_bdev_reset(struct block_device *bdev)
> > +{
> > +       set_device_ro(bdev, false);
> > +       bdev->bd_inode->i_size = 0;
> 
> i_size_write should be better I guess.

Yes, but that is a separate patch. This code is just moved

> 
> > +       if (max_part > 0) {
> > +               blkdev_reread_part(bdev);
> > +               bdev->bd_invalidated = 1;
> > +       }
> > +}
> > +
> >  /* Reset all properties of an NBD device */
> >  static void nbd_reset(struct nbd_device *nbd)
> >  {
> > +       sock_shutdown(nbd);
> > +       nbd_clear_que(nbd);
> > +       if (nbd->bdev) {
> > +               kill_bdev(nbd->bdev);
> > +               nbd_bdev_reset(nbd->bdev);
> > +       }
> > +
> >         nbd->disconnect = false;
> >         nbd->timedout = false;
> >         nbd->blksize = 1024;
> 
> I actually forgot to ask about this blksize. We do set_blocksize call
> for bdev, but
> shouldn't we instead do blkdev_logicial_blocksize?
> 
> Mount would set the blocksize depending on the filesystem's block size and the
> block device logical block size supported. Should we just get rid of this?

I think we should perhaps set the logical blocksize as well. But that's
a separate topic as well.

> 
> > @@ -669,16 +688,6 @@ static void nbd_reset(struct nbd_device *nbd)
> >         del_timer_sync(&nbd->timeout_timer);
> >  }
> >
> > -static void nbd_bdev_reset(struct block_device *bdev)
> > -{
> > -       set_device_ro(bdev, false);
> > -       bdev->bd_inode->i_size = 0;
> > -       if (max_part > 0) {
> > -               blkdev_reread_part(bdev);
> > -               bdev->bd_invalidated = 1;
> > -       }
> > -}
> > -
> >  static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
> >  {
> >         if (nbd->flags & NBD_FLAG_READ_ONLY)
> > @@ -803,18 +812,11 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
> 
> How about disallowing any ioctl until the nbd_device has been reset?
> Perhaps, throw error
> in open instead of checking here?

This is a comment to patch 2?!
The idea was to not allow any control over the nbd device unless some
clear instructions for a cleanup are send, for example CLEAR_SOCK. After
that nothing will work.

> 
> >                 mutex_lock(&nbd->tx_lock);
> >                 nbd->task_recv = NULL;
> >
> > -               sock_shutdown(nbd);
> > -               nbd_clear_que(nbd);
> > -               kill_bdev(bdev);
> > -               nbd_bdev_reset(bdev);
> > -
> >                 if (nbd->disconnect) /* user requested, ignore socket errors */
> >                         error = 0;
> >                 if (nbd->timedout)
> >                         error = -ETIMEDOUT;
> >
> 
> Doesn't this change the semantics for user space? NBD_DO_IT is
> supposed to be over either on error or a
> user disconnect not before that right[?].

Yes it does. But it will automatically cleanup the remaining pieces. For
example for the standard nbd-client implementation a CLEAR_SOCK is
called afterwards which will end up with the same result as with the
previous code, killing all bdev. Otherwise after all block device users
left, the cleanup is done. If someone tries to use the nbd before it was
cleaned up, it will return -EBUSY with an error message indicating the
reason.

> 
> > -               nbd_reset(nbd);
> > -
> >                 return error;
> >         }
> >
> > @@ -853,10 +855,35 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
> >         return error;
> >  }
> >
> > +static int nbd_open(struct block_device *bdev, fmode_t mode)
> > +{
> > +       struct nbd_device *nbd = bdev->bd_disk->private_data;
> > +
> > +       atomic_inc(&nbd->users);
> > +
> > +       if (!nbd->bdev)
> > +               nbd->bdev = bdev;
> > +
> > +       return 0;
> > +}
> > +
> > +static void nbd_release(struct gendisk *disk, fmode_t mode)
> > +{
> > +       struct nbd_device *nbd = disk->private_data;
> > +
> > +       if (atomic_dec_and_test(&nbd->users)) {
> > +               mutex_lock(&nbd->tx_lock);
> > +               nbd_reset(nbd);
> > +               mutex_unlock(&nbd->tx_lock);
> > +       }
> > +}
> > +
> What if the following happens,
> 
> 1) nbd-client [nbd-c1] attaches the block device
> 
> 2) mount this nbd device
> 
> 3) somebody goes crazy and does nbd-client -d <nbd_dev>
> 
> Step 3) would cause a DISCONNECT and a CLEAR_SOCK ioctl to be fired,
> but in CLEAR_SOCK there's a call to kill_bdev, so I guess that needs
> to go as well[?].

No, this is exactly what I wanted to happen. CLEAR_SOCK is an ioctl that
should ensure that the socket connection is gone.

I would prefer a client that has a "--force" option. Depending on this
option CLEAR_SOCK is called or not. This way the user can decide what
happens on disconnect or when the nbd device is still busy.

Best Regards,

Markus

> 
> >  static const struct block_device_operations nbd_fops = {
> >         .owner =        THIS_MODULE,
> >         .ioctl =        nbd_ioctl,
> >         .compat_ioctl = nbd_ioctl,
> > +       .open =         nbd_open,
> > +       .release =      nbd_release,
> >  };
> >
> >  #if IS_ENABLED(CONFIG_DEBUG_FS)
> > @@ -1087,6 +1114,7 @@ static int __init nbd_init(void)
> >                 disk->private_data = &nbd_dev[i];
> >                 sprintf(disk->disk_name, "nbd%d", i);
> >                 nbd_reset(&nbd_dev[i]);
> > +               atomic_set(&nbd_dev[i].users, 0);
> >                 add_disk(disk);
> >         }
> >
> > --
> > 2.1.4
> >
> 
> 

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 3/3]nbd: make nbd device wait for its users
  2016-06-24 10:09           ` [PATCH 3/3]nbd: make nbd device wait for its users Pranay Kr. Srivastava
  2016-06-24 13:42             ` [Nbd] " Eric Blake
  2016-06-25 18:01             ` Pranay Srivastava
@ 2016-06-29  7:06             ` Markus Pargmann
  2016-06-29  7:15               ` Pranay Srivastava
  2 siblings, 1 reply; 48+ messages in thread
From: Markus Pargmann @ 2016-06-29  7:06 UTC (permalink / raw)
  To: Pranay Kr. Srivastava; +Cc: nbd-general, linux-kernel, w

[-- Attachment #1: Type: text/plain, Size: 10266 bytes --]

Hi,

On Friday 24 June 2016 13:09:36 Pranay Kr. Srivastava wrote:
> When a timeout occurs or a recv fails, then
> instead of abruplty killing nbd block device
> wait for it's users to finish.
> 
> This is more required when filesystem(s) like
> ext2 or ext3 don't expect their buffer heads to
> disappear while the filesystem is mounted.
> 
> Each open of a nbd device is refcounted, while
> the userland program [nbd-client] doing the
> NBD_DO_IT ioctl would now wait for any other users
> of this device before invalidating the nbd device.
> 
> A timedout or a disconnected device, if in use, can't
> be used until it has been resetted. The resetting happens
> when all tasks having this bdev open closes this bdev.

Sorry, but this patch is unreadable. You are changing so many unrelated
whitespaces, lines, comments (that you introduced yourself in a previous
patch) and unrelated code. Please keep only the things that are
necessary for this single patch. Everything else can go into different
patches. Also it would be good to run checkpatch sometimes.

Also using your own atomic implementation instead of kref would be good
perhaps. Although I thought kref would work at the beginning but it
seems not to.

Best Regards,

Markus

> 
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> ---
>  drivers/block/nbd.c | 124 ++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 96 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 9223b09..0587bbd 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -70,10 +70,13 @@ struct nbd_device {
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>  	struct dentry *dbg_dir;
>  #endif
> +
>  	/*
> -	*This is specifically for calling sock_shutdown, for now.
> -	*/
> +	 *This is specifically for calling sock_shutdown, for now.
> +	 */
>  	struct work_struct ws_shutdown;
> +	struct kref users;
> +	struct completion user_completion;
>  };
>  
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -104,6 +107,8 @@ static DEFINE_SPINLOCK(nbd_lock);
>   * Shutdown function for nbd_dev work struct.
>   */
>  static void nbd_ws_func_shutdown(struct work_struct *);
> +static void nbd_kref_release(struct kref *);
> +static int nbd_size_clear(struct nbd_device *, struct block_device *);
>  
>  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>  {
> @@ -129,7 +134,7 @@ static const char *nbdcmd_to_ascii(int cmd)
>  
>  static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
>  {
> -	bdev->bd_inode->i_size = 0;
> +	i_size_write(bdev->bd_inode, 0);
>  	set_capacity(nbd->disk, 0);
>  	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
>  
> @@ -141,7 +146,7 @@ static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
>  	if (!nbd_is_connected(nbd))
>  		return;
>  
> -	bdev->bd_inode->i_size = nbd->bytesize;
> +	i_size_write(bdev->bd_inode, nbd->bytesize);
>  	set_capacity(nbd->disk, nbd->bytesize >> 9);
>  	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
>  }
> @@ -150,11 +155,9 @@ static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
>  			int blocksize, int nr_blocks)
>  {
>  	int ret;
> -
>  	ret = set_blocksize(bdev, blocksize);
>  	if (ret)
>  		return ret;
> -
>  	nbd->blksize = blocksize;
>  	nbd->bytesize = (loff_t)blocksize * (loff_t)nr_blocks;
>  
> @@ -202,14 +205,19 @@ static void nbd_xmit_timeout(unsigned long arg)
>  {
>  	struct nbd_device *nbd = (struct nbd_device *)arg;
>  
> +	if (nbd->timedout)
> +		return;
> +
>  	if (list_empty(&nbd->queue_head))
>  		return;
> +
>  	nbd->timedout = true;
> -	schedule_work(&nbd->ws_shutdown);
> +
>  	/*
>  	 * Make sure sender thread sees nbd->timedout.
>  	 */
>  	smp_wmb();
> +	schedule_work(&nbd->ws_shutdown);
>  	wake_up(&nbd->waiting_wq);
>  	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
>  }
> @@ -476,8 +484,6 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
>  		nbd_end_request(nbd, req);
>  	}
>  
> -	nbd_size_clear(nbd, bdev);
> -
>  	device_remove_file(disk_to_dev(nbd->disk), &dev_attr_pid);
>  
>  	nbd->task_recv = NULL;
> @@ -580,8 +586,8 @@ static int nbd_thread_send(void *data)
>  	while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
>  		/* wait for something to do */
>  		wait_event_interruptible(nbd->waiting_wq,
> -				kthread_should_stop() ||
> -				!list_empty(&nbd->waiting_queue));
> +					 kthread_should_stop() ||
> +					 !list_empty(&nbd->waiting_queue));
>  
>  		/* extract request */
>  		if (list_empty(&nbd->waiting_queue))
> @@ -589,11 +595,11 @@ static int nbd_thread_send(void *data)
>  
>  		spin_lock_irq(&nbd->queue_lock);
>  		req = list_entry(nbd->waiting_queue.next, struct request,
> -				queuelist);
> +				 queuelist);
>  		list_del_init(&req->queuelist);
>  		spin_unlock_irq(&nbd->queue_lock);
>  
> -		nbd_handle_req(nbd, req);
> +		/* handle request */
>  		if (nbd->timedout) {
>  			req->errors++;
>  			nbd_end_request(nbd, req);
> @@ -654,12 +660,13 @@ static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
>  	int ret = 0;
>  
>  	spin_lock(&nbd->sock_lock);
> -	if (nbd->sock)
> +
> +	if (nbd->sock || nbd->timedout)
>  		ret = -EBUSY;
>  	else
>  		nbd->sock = sock;
> -	spin_unlock(&nbd->sock_lock);
>  
> +	spin_unlock(&nbd->sock_lock);
>  	return ret;
>  }
>  
> @@ -674,6 +681,7 @@ static void nbd_reset(struct nbd_device *nbd)
>  	nbd->flags = 0;
>  	nbd->xmit_timeout = 0;
>  	INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
> +	init_completion(&nbd->user_completion);
>  	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>  	del_timer_sync(&nbd->timeout_timer);
>  }
> @@ -708,6 +716,9 @@ static void nbd_dev_dbg_close(struct nbd_device *nbd);
>  static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>  		       unsigned int cmd, unsigned long arg)
>  {
> +	if (nbd->timedout || nbd->disconnect)
> +		return -EBUSY;
> +
>  	switch (cmd) {
>  	case NBD_DISCONNECT: {
>  		struct request sreq;
> @@ -737,7 +748,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>  		nbd_clear_que(nbd);
>  		BUG_ON(!list_empty(&nbd->queue_head));
>  		BUG_ON(!list_empty(&nbd->waiting_queue));
> -		kill_bdev(bdev);
>  		return 0;
>  
>  	case NBD_SET_SOCK: {
> @@ -756,7 +766,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>  
>  	case NBD_SET_BLKSIZE: {
>  		loff_t bsize = div_s64(nbd->bytesize, arg);
> -
>  		return nbd_size_set(nbd, bdev, arg, bsize);
>  	}
>  
> @@ -808,22 +817,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>  		error = nbd_thread_recv(nbd, bdev);
>  		nbd_dev_dbg_close(nbd);
>  		kthread_stop(thread);
> -		sock_shutdown(nbd);
>  
> -		mutex_lock(&nbd->tx_lock);
> -		nbd->task_recv = NULL;
> -
> -		nbd_clear_que(nbd);
> -		kill_bdev(bdev);
> -		nbd_bdev_reset(bdev);
> +		sock_shutdown(nbd);
>  
>  		if (nbd->disconnect) /* user requested, ignore socket errors */
>  			error = 0;
>  		if (nbd->timedout)
>  			error = -ETIMEDOUT;
>  
> -		nbd_reset(nbd);
> +		mutex_lock(&nbd->tx_lock);
> +		nbd_clear_que(nbd);
> +		nbd->disconnect = true; /* To kill bdev*/
> +		mutex_unlock(&nbd->tx_lock);
> +		cancel_work_sync(&nbd->ws_shutdown);
> +		kref_put(&nbd->users, nbd_kref_release);
> +		wait_for_completion(&nbd->user_completion);
>  
> +		mutex_lock(&bdev->bd_mutex);
> +		if (!kref_get_unless_zero(&nbd->users))
> +			kref_init(&nbd->users);
> +		mutex_unlock(&bdev->bd_mutex);
> +
> +		mutex_lock(&nbd->tx_lock);
> +		nbd_reset(nbd);
>  		return error;
>  	}
>  
> @@ -861,19 +877,71 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
>  
>  	return error;
>  }
> +static void nbd_kref_release(struct kref *kref_users)
> +{
> +	struct nbd_device *nbd = container_of(kref_users, struct nbd_device,
> +			users);
> +	schedule_work(&nbd->ws_shutdown);
> +}
> +
> +static int nbd_open(struct block_device *bdev, fmode_t mode)
> +{
> +	struct nbd_device *nbd_dev = bdev->bd_disk->private_data;
> +
> +	if (!kref_get_unless_zero(&nbd_dev->users))
> +		kref_init(&nbd_dev->users);
> +
> +	pr_debug("Opening nbd_dev %s. Active users = %u\n",
> +			bdev->bd_disk->disk_name,
> +			atomic_read(&nbd_dev->users.refcount)
> +		);
> +	return 0;
> +}
> +
> +static void nbd_release(struct gendisk *disk, fmode_t mode)
> +{
> +	struct nbd_device *nbd_dev = disk->private_data;
> +
> +	kref_put(&nbd_dev->users,  nbd_kref_release);
> +
> +	pr_debug("Closing nbd_dev %s. Active users = %u\n",
> +			disk->disk_name,
> +			atomic_read(&nbd_dev->users.refcount)
> +		);
> +}
>  
>  static const struct block_device_operations nbd_fops = {
>  	.owner =	THIS_MODULE,
>  	.ioctl =	nbd_ioctl,
>  	.compat_ioctl =	nbd_ioctl,
> +	.open =		nbd_open,
> +	.release =	nbd_release
>  };
>  
> +
>  static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
>  {
>  	struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
> -			ws_shutdown);
> -
> -	sock_shutdown(nbd_dev);
> +							ws_shutdown);
> +	struct block_device *bdev = bdget(part_devt(
> +						dev_to_part(nbd_to_dev(nbd_dev))
> +						)
> +					);
> +	BUG_ON(!bdev);
> +	if (nbd_dev->timedout)
> +		sock_shutdown(nbd_dev);
> +
> +	if (nbd_dev->disconnect) {
> +		mutex_lock(&nbd_dev->tx_lock);
> +		nbd_dev->task_recv = NULL;
> +		nbd_clear_que(nbd_dev);
> +		kill_bdev(bdev);
> +		nbd_bdev_reset(bdev);
> +		mutex_unlock(&nbd_dev->tx_lock);
> +		nbd_size_clear(nbd_dev, bdev);
> +		complete(&nbd_dev->user_completion);
> +	}
> +	bdput(bdev);
>  }
>  
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> 

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 3/3]nbd: make nbd device wait for its users
  2016-06-29  7:06             ` Markus Pargmann
@ 2016-06-29  7:15               ` Pranay Srivastava
  0 siblings, 0 replies; 48+ messages in thread
From: Pranay Srivastava @ 2016-06-29  7:15 UTC (permalink / raw)
  To: Markus Pargmann; +Cc: nbd-general, linux-kernel, Wouter Verhelst

Hi

On Wed, Jun 29, 2016 at 12:36 PM, Markus Pargmann <mpa@pengutronix.de> wrote:
> Hi,
>
> On Friday 24 June 2016 13:09:36 Pranay Kr. Srivastava wrote:
>> When a timeout occurs or a recv fails, then
>> instead of abruplty killing nbd block device
>> wait for it's users to finish.
>>
>> This is more required when filesystem(s) like
>> ext2 or ext3 don't expect their buffer heads to
>> disappear while the filesystem is mounted.
>>
>> Each open of a nbd device is refcounted, while
>> the userland program [nbd-client] doing the
>> NBD_DO_IT ioctl would now wait for any other users
>> of this device before invalidating the nbd device.
>>
>> A timedout or a disconnected device, if in use, can't
>> be used until it has been resetted. The resetting happens
>> when all tasks having this bdev open closes this bdev.
>
> Sorry, but this patch is unreadable. You are changing so many unrelated
> whitespaces, lines, comments (that you introduced yourself in a previous
> patch) and unrelated code. Please keep only the things that are

Yes you are right.

> necessary for this single patch. Everything else can go into different
> patches. Also it would be good to run checkpatch sometimes.
>

> Also using your own atomic implementation instead of kref would be good
> perhaps. Although I thought kref would work at the beginning but it
> seems not to.

I was able to make it work and it turned out to be simpler.
Do you think you can go over this patch again just for the kref part?
Should I resend this patch again?

>
> Best Regards,
>
> Markus
>
>>
>> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
>> ---
>>  drivers/block/nbd.c | 124 ++++++++++++++++++++++++++++++++++++++++------------
>>  1 file changed, 96 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
>> index 9223b09..0587bbd 100644
>> --- a/drivers/block/nbd.c
>> +++ b/drivers/block/nbd.c
>> @@ -70,10 +70,13 @@ struct nbd_device {
>>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>>       struct dentry *dbg_dir;
>>  #endif
>> +
>>       /*
>> -     *This is specifically for calling sock_shutdown, for now.
>> -     */
>> +      *This is specifically for calling sock_shutdown, for now.
>> +      */
>>       struct work_struct ws_shutdown;
>> +     struct kref users;
>> +     struct completion user_completion;
>>  };
>>
>>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>> @@ -104,6 +107,8 @@ static DEFINE_SPINLOCK(nbd_lock);
>>   * Shutdown function for nbd_dev work struct.
>>   */
>>  static void nbd_ws_func_shutdown(struct work_struct *);
>> +static void nbd_kref_release(struct kref *);
>> +static int nbd_size_clear(struct nbd_device *, struct block_device *);
>>
>>  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>>  {
>> @@ -129,7 +134,7 @@ static const char *nbdcmd_to_ascii(int cmd)
>>
>>  static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
>>  {
>> -     bdev->bd_inode->i_size = 0;
>> +     i_size_write(bdev->bd_inode, 0);
>>       set_capacity(nbd->disk, 0);
>>       kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
>>
>> @@ -141,7 +146,7 @@ static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
>>       if (!nbd_is_connected(nbd))
>>               return;
>>
>> -     bdev->bd_inode->i_size = nbd->bytesize;
>> +     i_size_write(bdev->bd_inode, nbd->bytesize);
>>       set_capacity(nbd->disk, nbd->bytesize >> 9);
>>       kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
>>  }
>> @@ -150,11 +155,9 @@ static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
>>                       int blocksize, int nr_blocks)
>>  {
>>       int ret;
>> -
>>       ret = set_blocksize(bdev, blocksize);
>>       if (ret)
>>               return ret;
>> -
>>       nbd->blksize = blocksize;
>>       nbd->bytesize = (loff_t)blocksize * (loff_t)nr_blocks;
>>
>> @@ -202,14 +205,19 @@ static void nbd_xmit_timeout(unsigned long arg)
>>  {
>>       struct nbd_device *nbd = (struct nbd_device *)arg;
>>
>> +     if (nbd->timedout)
>> +             return;
>> +
>>       if (list_empty(&nbd->queue_head))
>>               return;
>> +
>>       nbd->timedout = true;
>> -     schedule_work(&nbd->ws_shutdown);
>> +
>>       /*
>>        * Make sure sender thread sees nbd->timedout.
>>        */
>>       smp_wmb();
>> +     schedule_work(&nbd->ws_shutdown);
>>       wake_up(&nbd->waiting_wq);
>>       dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
>>  }
>> @@ -476,8 +484,6 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
>>               nbd_end_request(nbd, req);
>>       }
>>
>> -     nbd_size_clear(nbd, bdev);
>> -
>>       device_remove_file(disk_to_dev(nbd->disk), &dev_attr_pid);
>>
>>       nbd->task_recv = NULL;
>> @@ -580,8 +586,8 @@ static int nbd_thread_send(void *data)
>>       while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
>>               /* wait for something to do */
>>               wait_event_interruptible(nbd->waiting_wq,
>> -                             kthread_should_stop() ||
>> -                             !list_empty(&nbd->waiting_queue));
>> +                                      kthread_should_stop() ||
>> +                                      !list_empty(&nbd->waiting_queue));
>>
>>               /* extract request */
>>               if (list_empty(&nbd->waiting_queue))
>> @@ -589,11 +595,11 @@ static int nbd_thread_send(void *data)
>>
>>               spin_lock_irq(&nbd->queue_lock);
>>               req = list_entry(nbd->waiting_queue.next, struct request,
>> -                             queuelist);
>> +                              queuelist);
>>               list_del_init(&req->queuelist);
>>               spin_unlock_irq(&nbd->queue_lock);
>>
>> -             nbd_handle_req(nbd, req);
>> +             /* handle request */
>>               if (nbd->timedout) {
>>                       req->errors++;
>>                       nbd_end_request(nbd, req);
>> @@ -654,12 +660,13 @@ static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
>>       int ret = 0;
>>
>>       spin_lock(&nbd->sock_lock);
>> -     if (nbd->sock)
>> +
>> +     if (nbd->sock || nbd->timedout)
>>               ret = -EBUSY;
>>       else
>>               nbd->sock = sock;
>> -     spin_unlock(&nbd->sock_lock);
>>
>> +     spin_unlock(&nbd->sock_lock);
>>       return ret;
>>  }
>>
>> @@ -674,6 +681,7 @@ static void nbd_reset(struct nbd_device *nbd)
>>       nbd->flags = 0;
>>       nbd->xmit_timeout = 0;
>>       INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
>> +     init_completion(&nbd->user_completion);
>>       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>>       del_timer_sync(&nbd->timeout_timer);
>>  }
>> @@ -708,6 +716,9 @@ static void nbd_dev_dbg_close(struct nbd_device *nbd);
>>  static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>>                      unsigned int cmd, unsigned long arg)
>>  {
>> +     if (nbd->timedout || nbd->disconnect)
>> +             return -EBUSY;
>> +
>>       switch (cmd) {
>>       case NBD_DISCONNECT: {
>>               struct request sreq;
>> @@ -737,7 +748,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>>               nbd_clear_que(nbd);
>>               BUG_ON(!list_empty(&nbd->queue_head));
>>               BUG_ON(!list_empty(&nbd->waiting_queue));
>> -             kill_bdev(bdev);
>>               return 0;
>>
>>       case NBD_SET_SOCK: {
>> @@ -756,7 +766,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>>
>>       case NBD_SET_BLKSIZE: {
>>               loff_t bsize = div_s64(nbd->bytesize, arg);
>> -
>>               return nbd_size_set(nbd, bdev, arg, bsize);
>>       }
>>
>> @@ -808,22 +817,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>>               error = nbd_thread_recv(nbd, bdev);
>>               nbd_dev_dbg_close(nbd);
>>               kthread_stop(thread);
>> -             sock_shutdown(nbd);
>>
>> -             mutex_lock(&nbd->tx_lock);
>> -             nbd->task_recv = NULL;
>> -
>> -             nbd_clear_que(nbd);
>> -             kill_bdev(bdev);
>> -             nbd_bdev_reset(bdev);
>> +             sock_shutdown(nbd);
>>
>>               if (nbd->disconnect) /* user requested, ignore socket errors */
>>                       error = 0;
>>               if (nbd->timedout)
>>                       error = -ETIMEDOUT;
>>
>> -             nbd_reset(nbd);
>> +             mutex_lock(&nbd->tx_lock);
>> +             nbd_clear_que(nbd);
>> +             nbd->disconnect = true; /* To kill bdev*/
>> +             mutex_unlock(&nbd->tx_lock);
>> +             cancel_work_sync(&nbd->ws_shutdown);
>> +             kref_put(&nbd->users, nbd_kref_release);
>> +             wait_for_completion(&nbd->user_completion);
>>
>> +             mutex_lock(&bdev->bd_mutex);
>> +             if (!kref_get_unless_zero(&nbd->users))
>> +                     kref_init(&nbd->users);
>> +             mutex_unlock(&bdev->bd_mutex);
>> +
>> +             mutex_lock(&nbd->tx_lock);
>> +             nbd_reset(nbd);
>>               return error;
>>       }
>>
>> @@ -861,19 +877,71 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
>>
>>       return error;
>>  }
>> +static void nbd_kref_release(struct kref *kref_users)
>> +{
>> +     struct nbd_device *nbd = container_of(kref_users, struct nbd_device,
>> +                     users);
>> +     schedule_work(&nbd->ws_shutdown);
>> +}
>> +
>> +static int nbd_open(struct block_device *bdev, fmode_t mode)
>> +{
>> +     struct nbd_device *nbd_dev = bdev->bd_disk->private_data;
>> +
>> +     if (!kref_get_unless_zero(&nbd_dev->users))
>> +             kref_init(&nbd_dev->users);
>> +
>> +     pr_debug("Opening nbd_dev %s. Active users = %u\n",
>> +                     bdev->bd_disk->disk_name,
>> +                     atomic_read(&nbd_dev->users.refcount)
>> +             );
>> +     return 0;
>> +}
>> +
>> +static void nbd_release(struct gendisk *disk, fmode_t mode)
>> +{
>> +     struct nbd_device *nbd_dev = disk->private_data;
>> +
>> +     kref_put(&nbd_dev->users,  nbd_kref_release);
>> +
>> +     pr_debug("Closing nbd_dev %s. Active users = %u\n",
>> +                     disk->disk_name,
>> +                     atomic_read(&nbd_dev->users.refcount)
>> +             );
>> +}
>>
>>  static const struct block_device_operations nbd_fops = {
>>       .owner =        THIS_MODULE,
>>       .ioctl =        nbd_ioctl,
>>       .compat_ioctl = nbd_ioctl,
>> +     .open =         nbd_open,
>> +     .release =      nbd_release
>>  };
>>
>> +
>>  static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
>>  {
>>       struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
>> -                     ws_shutdown);
>> -
>> -     sock_shutdown(nbd_dev);
>> +                                                     ws_shutdown);
>> +     struct block_device *bdev = bdget(part_devt(
>> +                                             dev_to_part(nbd_to_dev(nbd_dev))
>> +                                             )
>> +                                     );
>> +     BUG_ON(!bdev);
>> +     if (nbd_dev->timedout)
>> +             sock_shutdown(nbd_dev);
>> +
>> +     if (nbd_dev->disconnect) {
>> +             mutex_lock(&nbd_dev->tx_lock);
>> +             nbd_dev->task_recv = NULL;
>> +             nbd_clear_que(nbd_dev);
>> +             kill_bdev(bdev);
>> +             nbd_bdev_reset(bdev);
>> +             mutex_unlock(&nbd_dev->tx_lock);
>> +             nbd_size_clear(nbd_dev, bdev);
>> +             complete(&nbd_dev->user_completion);
>> +     }
>> +     bdput(bdev);
>>  }
>>
>>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>>
>
> --
> Pengutronix e.K.                           |                             |
> Industrial Linux Solutions                 | http://www.pengutronix.de/  |
> Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
> Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |



-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 1/3]nbd: fix might_sleep warning on socket shutdown
  2016-06-24 10:09           ` [PATCH v3 1/3]nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
  2016-06-28  5:42             ` Pranay Srivastava
@ 2016-06-29  7:18             ` Markus Pargmann
  1 sibling, 0 replies; 48+ messages in thread
From: Markus Pargmann @ 2016-06-29  7:18 UTC (permalink / raw)
  To: Pranay Kr. Srivastava; +Cc: nbd-general, linux-kernel, w

[-- Attachment #1: Type: text/plain, Size: 6278 bytes --]

On Friday 24 June 2016 13:09:34 Pranay Kr. Srivastava wrote:
> spinlocked ranges should be small and not contain calls into huge
> subfunctions. Fix my mistake and just get the pointer to the socket
> instead of doing everything with spinlock held.
> 
> Reported-by: Mikulas Patocka <mikulas@twibright.com>
> Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
> 
> Changelog:
> Pranay Kr. Srivastava<pranjas@gmail.com>:
> 
> 1) Use spin_lock instead of irq version for sock_shutdown.
> 
> 2) Use system work queue to actually trigger the shutdown of
>    socket. This solves the issue when kernel_sendmsg is currently
>    blocked while a timeout occurs.
> 
> Signed-off-by: Pranay Kr. Srivastava <pranjas@gmail.com>
> ---
>  drivers/block/nbd.c | 69 ++++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 44 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 56f7f5d..586d946 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -39,6 +39,7 @@
>  #include <asm/types.h>
>  
>  #include <linux/nbd.h>
> +#include <linux/workqueue.h>
>  
>  struct nbd_device {
>  	u32 flags;
> @@ -69,6 +70,10 @@ struct nbd_device {
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>  	struct dentry *dbg_dir;
>  #endif
> +	/*
> +	*This is specifically for calling sock_shutdown, for now.
> +	*/

Please fix the indentation of this comment.

> +	struct work_struct ws_shutdown;
>  };
>  
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
> @@ -95,6 +100,11 @@ static int max_part;
>   */
>  static DEFINE_SPINLOCK(nbd_lock);
>  
> +/*
> + * Shutdown function for nbd_dev work struct.
> + */
> +static void nbd_ws_func_shutdown(struct work_struct *);

You could as well put the function implementation here. No need for a
function signature.

> +
>  static inline struct device *nbd_to_dev(struct nbd_device *nbd)
>  {
>  	return disk_to_dev(nbd->disk);
> @@ -172,39 +182,35 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
>   */
>  static void sock_shutdown(struct nbd_device *nbd)
>  {
> -	spin_lock_irq(&nbd->sock_lock);
> -
> -	if (!nbd->sock) {
> -		spin_unlock_irq(&nbd->sock_lock);
> -		return;
> -	}
> +	struct socket *sock;
>  
> -	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
> -	kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
> -	sockfd_put(nbd->sock);
> +	spin_lock(&nbd->sock_lock);
> +	sock = nbd->sock;
>  	nbd->sock = NULL;
> -	spin_unlock_irq(&nbd->sock_lock);
> +	spin_unlock(&nbd->sock_lock);
> +
> +	if (!sock)
> +		return;
>  
>  	del_timer(&nbd->timeout_timer);
> +	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
> +	kernel_sock_shutdown(sock, SHUT_RDWR);
> +	sockfd_put(sock);
>  }
>  
>  static void nbd_xmit_timeout(unsigned long arg)
>  {
>  	struct nbd_device *nbd = (struct nbd_device *)arg;
> -	unsigned long flags;
>  
>  	if (list_empty(&nbd->queue_head))
>  		return;
> -
> -	spin_lock_irqsave(&nbd->sock_lock, flags);
> -
>  	nbd->timedout = true;
> -
> -	if (nbd->sock)
> -		kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
> -
> -	spin_unlock_irqrestore(&nbd->sock_lock, flags);
> -
> +	schedule_work(&nbd->ws_shutdown);
> +	/*
> +	 * Make sure sender thread sees nbd->timedout.
> +	 */
> +	smp_wmb();
> +	wake_up(&nbd->waiting_wq);
>  	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
>  }
>  
> @@ -574,8 +580,8 @@ static int nbd_thread_send(void *data)
>  	while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
>  		/* wait for something to do */
>  		wait_event_interruptible(nbd->waiting_wq,
> -					 kthread_should_stop() ||
> -					 !list_empty(&nbd->waiting_queue));
> +				kthread_should_stop() ||
> +				!list_empty(&nbd->waiting_queue));

This is unrelated, please remove.

>  
>  		/* extract request */
>  		if (list_empty(&nbd->waiting_queue))
> @@ -583,12 +589,16 @@ static int nbd_thread_send(void *data)
>  
>  		spin_lock_irq(&nbd->queue_lock);
>  		req = list_entry(nbd->waiting_queue.next, struct request,
> -				 queuelist);
> +				queuelist);

Unrelated as well.

>  		list_del_init(&req->queuelist);
>  		spin_unlock_irq(&nbd->queue_lock);
>  
> -		/* handle request */

Unrelated.

>  		nbd_handle_req(nbd, req);
> +		if (nbd->timedout) {
> +			req->errors++;
> +			nbd_end_request(nbd, req);
> +		} else
> +			nbd_handle_req(nbd, req);

This does not change anything to avoid the spinlock sock_shutdown issue.
Please split in a separate patch.

Also this is already handled in nbd_handle_req(). For !nbd->sock the
same code is executed.


Otherwise the patch looks good.

Best Regards,

Markus

>  	}
>  
>  	nbd->task_send = NULL;
> @@ -668,6 +678,7 @@ static void nbd_reset(struct nbd_device *nbd)
>  	set_capacity(nbd->disk, 0);
>  	nbd->flags = 0;
>  	nbd->xmit_timeout = 0;
> +	INIT_WORK(&nbd->ws_shutdown, nbd_ws_func_shutdown);
>  	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
>  	del_timer_sync(&nbd->timeout_timer);
>  }
> @@ -802,11 +813,11 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>  		error = nbd_thread_recv(nbd, bdev);
>  		nbd_dev_dbg_close(nbd);
>  		kthread_stop(thread);
> +		sock_shutdown(nbd);
>  
>  		mutex_lock(&nbd->tx_lock);
>  		nbd->task_recv = NULL;
>  
> -		sock_shutdown(nbd);
>  		nbd_clear_que(nbd);
>  		kill_bdev(bdev);
>  		nbd_bdev_reset(bdev);
> @@ -862,6 +873,14 @@ static const struct block_device_operations nbd_fops = {
>  	.compat_ioctl =	nbd_ioctl,
>  };
>  
> +static void nbd_ws_func_shutdown(struct work_struct *ws_nbd)
> +{
> +	struct nbd_device *nbd_dev = container_of(ws_nbd, struct nbd_device,
> +			ws_shutdown);
> +
> +	sock_shutdown(nbd_dev);
> +}
> +
>  #if IS_ENABLED(CONFIG_DEBUG_FS)
>  
>  static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
> 

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 2/2] nbd: Disallow ioctls on disconnected block device
  2016-06-24  9:29                 ` [PATCH 2/2] nbd: Disallow ioctls on disconnected block device Markus Pargmann
@ 2016-07-16  7:42                   ` Pranay Srivastava
  2016-07-16  9:32                     ` [Nbd] " Alex Bligh
  0 siblings, 1 reply; 48+ messages in thread
From: Pranay Srivastava @ 2016-07-16  7:42 UTC (permalink / raw)
  To: Markus Pargmann; +Cc: nbd-general, linux-kernel, Wouter Verhelst

Hi,

On Fri, Jun 24, 2016 at 2:59 PM, Markus Pargmann <mpa@pengutronix.de> wrote:
> After NBD_DO_IT exited the block device may still be used. Make sure
> that we handle intended disconnects differently and do not allow any
> changed of the nbd device.
>
> This patch should avoid that the nbd-client connects to a different server
> and the users of the block device are suddenly reading/writing from a
> different backend device.
>
> For timeouts it is still possible to setup a new socket so that the
> connection may be refreshed without creating problems for all users.

But Shouldn't time out be checked for last end point?

For example, consider the following steps

1) Timeout occurs but server[nbd-s1] comes up again albeit with a different
    network address.

2) The previous network address of server [nbd-s1] has now been assigned to
    another new nbd server [nbd-s2]

3) A new nbd-client tries to setup the socket again, Negotiation would
be done again
    [correct?]. If correct then wouldn't we be sending data to wrong
device this time?

So instead can't we put a mechanism in place for network address + mac
to be same
for allowing clients to reconnect? Do let me know if this is not of concern.

4) If 3) doesn't apply then let's disallow all ioctls until nbd device
is reset.

>
> Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
> ---
>  drivers/block/nbd.c | 30 ++++++++++++++++++++++++------
>  1 file changed, 24 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 620660f3ff0f..39358efac73e 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -708,6 +708,18 @@ static void nbd_dev_dbg_close(struct nbd_device *nbd);
>  static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>                        unsigned int cmd, unsigned long arg)
>  {
> +       /*
> +        * After a disconnect was instructed, do not allow any further actions
> +        * on the block device that would lead to a new connected endpoint.
> +        * This condition stays until nbd_reset was called either because all
> +        * users closed the device or because of CLEAR_SOCK.
> +        */
> +       if (nbd->disconnect &&
> +           cmd != NBD_CLEAR_SOCK && cmd != NBD_PRINT_DEBUG) {
> +               dev_info(disk_to_dev(nbd->disk), "Device is still busy after instructing a disconnect\n");
> +               return -EBUSY;
> +       }
> +
>         switch (cmd) {
>         case NBD_DISCONNECT: {
>                 struct request sreq;
> @@ -733,11 +745,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>         }
>
>         case NBD_CLEAR_SOCK:
> -               sock_shutdown(nbd);
> -               nbd_clear_que(nbd);
> -               BUG_ON(!list_empty(&nbd->queue_head));
> -               BUG_ON(!list_empty(&nbd->waiting_queue));
> -               kill_bdev(bdev);
> +               if (nbd->disconnect) {
> +                       nbd_reset(nbd);
> +               } else {
> +                       sock_shutdown(nbd);
> +                       nbd_clear_que(nbd);
> +                       BUG_ON(!list_empty(&nbd->queue_head));
> +                       BUG_ON(!list_empty(&nbd->waiting_queue));
> +                       kill_bdev(bdev);
> +               }
>                 return 0;
>
>         case NBD_SET_SOCK: {
> @@ -812,8 +828,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
>                 mutex_lock(&nbd->tx_lock);
>                 nbd->task_recv = NULL;
>
> -               if (nbd->disconnect) /* user requested, ignore socket errors */
> +               if (nbd->disconnect) { /* user requested, ignore socket errors */
> +                       sock_shutdown(nbd);
>                         error = 0;
> +               }
>                 if (nbd->timedout)
>                         error = -ETIMEDOUT;
>
> --
> 2.1.4
>



-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Nbd] [PATCH 2/2] nbd: Disallow ioctls on disconnected block device
  2016-07-16  7:42                   ` Pranay Srivastava
@ 2016-07-16  9:32                     ` Alex Bligh
  2016-07-16 10:08                       ` Pranay Srivastava
  0 siblings, 1 reply; 48+ messages in thread
From: Alex Bligh @ 2016-07-16  9:32 UTC (permalink / raw)
  To: Pranay Srivastava
  Cc: Alex Bligh, Markus Pargmann, nbd-general, Wouter Verhelst, linux-kernel


On 16 Jul 2016, at 08:42, Pranay Srivastava <pranjas@gmail.com> wrote:

> So instead can't we put a mechanism in place for network address + mac
> to be same
> for allowing clients to reconnect? Do let me know if this is not of concern.

MAC address?! nbd clients connect over IP, and if a router reboots
between them, you could easily see two packets from the same client
come from different MAC addresses. Similarly all clients not on
the same L2 network will carry the same MAC address. So MAC address
is a very poor indicator of 'same client'.

IP address is also a poor indicator (think NAT) but is substantially
less bad.

-- 
Alex Bligh

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Nbd] [PATCH 2/2] nbd: Disallow ioctls on disconnected block device
  2016-07-16  9:32                     ` [Nbd] " Alex Bligh
@ 2016-07-16 10:08                       ` Pranay Srivastava
  2016-07-16 11:26                         ` Wouter Verhelst
  0 siblings, 1 reply; 48+ messages in thread
From: Pranay Srivastava @ 2016-07-16 10:08 UTC (permalink / raw)
  To: Alex Bligh; +Cc: Markus Pargmann, nbd-general, Wouter Verhelst, linux-kernel

On Sat, Jul 16, 2016 at 3:02 PM, Alex Bligh <alex@alex.org.uk> wrote:
>
> On 16 Jul 2016, at 08:42, Pranay Srivastava <pranjas@gmail.com> wrote:
>
>> So instead can't we put a mechanism in place for network address + mac
>> to be same
>> for allowing clients to reconnect? Do let me know if this is not of concern.
>
> MAC address?! nbd clients connect over IP, and if a router reboots
> between them, you could easily see two packets from the same client
> come from different MAC addresses. Similarly all clients not on
> the same L2 network will carry the same MAC address. So MAC address
> is a very poor indicator of 'same client'.
>
> IP address is also a poor indicator (think NAT) but is substantially
> less bad.

Okay. So how about we include some negotiated key which goes in with every
request which the server could maintain for clients that can be checked while
resetting the connection with the same server?

So am I correct that this situation can
indeed happen or the server will throw an error back to client in case
the troubled
nbd-client is trying to reconnect to the original server but requests
are going to
another server?

If yes to above query then what is the best effort we can do to avoid
such scenarios?

>
> --
> Alex Bligh
>
>
>
>



-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Nbd] [PATCH 2/2] nbd: Disallow ioctls on disconnected block device
  2016-07-16 10:08                       ` Pranay Srivastava
@ 2016-07-16 11:26                         ` Wouter Verhelst
  2016-07-16 13:31                           ` Pranay Srivastava
  0 siblings, 1 reply; 48+ messages in thread
From: Wouter Verhelst @ 2016-07-16 11:26 UTC (permalink / raw)
  To: Pranay Srivastava; +Cc: Alex Bligh, nbd-general, linux-kernel

On Sat, Jul 16, 2016 at 03:38:40PM +0530, Pranay Srivastava wrote:
> Okay. So how about we include some negotiated key which goes in with every
> request which the server could maintain for clients that can be checked while
> resetting the connection with the same server?

Wut?

> So am I correct that this situation can
> indeed happen or the server will throw an error back to client in case
> the troubled
> nbd-client is trying to reconnect to the original server but requests
> are going to
> another server?
> 
> If yes to above query then what is the best effort we can do to avoid
> such scenarios?

Tell userspace not to do stupid things?

This isn't a problem. The kernel assumes that whatever userspace does,
once the connection is set up again everything's the way it was before.
If that's not true, then userspace is to blame, not kernel space.

Adding a "key" which we need to pass is going to make things wildly more
complicated for no benefit.

-- 
< ron> I mean, the main *practical* problem with C++, is there's like a dozen
       people in the world who think they really understand all of its rules,
       and pretty much all of them are just lying to themselves too.
 -- #debian-devel, OFTC, 2016-02-12

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Nbd] [PATCH 2/2] nbd: Disallow ioctls on disconnected block device
  2016-07-16 11:26                         ` Wouter Verhelst
@ 2016-07-16 13:31                           ` Pranay Srivastava
  0 siblings, 0 replies; 48+ messages in thread
From: Pranay Srivastava @ 2016-07-16 13:31 UTC (permalink / raw)
  To: Wouter Verhelst; +Cc: Alex Bligh, nbd-general, linux-kernel

On Sat, Jul 16, 2016 at 4:56 PM, Wouter Verhelst <w@uter.be> wrote:
> On Sat, Jul 16, 2016 at 03:38:40PM +0530, Pranay Srivastava wrote:
>> Okay. So how about we include some negotiated key which goes in with every
>> request which the server could maintain for clients that can be checked while
>> resetting the connection with the same server?
>
> Wut?
>
>> So am I correct that this situation can
>> indeed happen or the server will throw an error back to client in case
>> the troubled
>> nbd-client is trying to reconnect to the original server but requests
>> are going to
>> another server?
>>
>> If yes to above query then what is the best effort we can do to avoid
>> such scenarios?
>
> Tell userspace not to do stupid things?
>
> This isn't a problem. The kernel assumes that whatever userspace does,
> once the connection is set up again everything's the way it was before.
> If that's not true, then userspace is to blame, not kernel space.
>
> Adding a "key" which we need to pass is going to make things wildly more
> complicated for no benefit.
Okay.
So let things roll for timeout but stop for disconnect.
>
> --
> < ron> I mean, the main *practical* problem with C++, is there's like a dozen
>        people in the world who think they really understand all of its rules,
>        and pretty much all of them are just lying to themselves too.
>  -- #debian-devel, OFTC, 2016-02-12



-- 
        ---P.K.S

^ permalink raw reply	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2016-07-16 13:31 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-24 11:26 [PATCH 0/4]nbd: fixes for nbd Pranay Kr. Srivastava
2016-05-24 11:26 ` [PATCH 1/4] fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
2016-05-30 12:06   ` Markus Pargmann
2016-05-24 11:26 ` [PATCH 2/4] fix various coding standard warnings Pranay Kr. Srivastava
2016-05-24 11:26 ` [PATCH 3/4] make nbd device wait for its users Pranay Kr. Srivastava
2016-05-30 10:44   ` Markus Pargmann
2016-05-24 11:26 ` [PATCH 4/4] use device_attr macros for sysfs attribute Pranay Kr. Srivastava
2016-05-30  3:35 ` [PATCH 0/4]nbd: fixes for nbd Pranay Srivastava
2016-05-30 12:27 ` Markus Pargmann
2016-06-02 10:24   ` [PATCH v2 0/5] nbd: " Pranay Kr. Srivastava
2016-06-02 10:24     ` [PATCH v2 1/5] nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
2016-06-09 10:03       ` Pranay Srivastava
2016-06-14  5:13         ` Pranay Srivastava
2016-06-14  8:52       ` Markus Pargmann
2016-06-14  9:50         ` Pranay Srivastava
2016-06-24 10:09         ` [PATCH v3 0/3] nbd: resolve bugs and limitations Pranay Kr. Srivastava
2016-06-24 10:09           ` [PATCH v3 1/3]nbd: fix might_sleep warning on socket shutdown Pranay Kr. Srivastava
2016-06-28  5:42             ` Pranay Srivastava
2016-06-29  7:18             ` Markus Pargmann
2016-06-24 10:09           ` [PATCH v3 2/3]nbd: cleanup nbd_set_socket Pranay Kr. Srivastava
2016-06-24 10:09           ` [PATCH 3/3]nbd: make nbd device wait for its users Pranay Kr. Srivastava
2016-06-24 13:42             ` [Nbd] " Eric Blake
2016-06-25 17:56               ` Pranay Srivastava
2016-06-25 18:01             ` Pranay Srivastava
2016-06-29  7:06             ` Markus Pargmann
2016-06-29  7:15               ` Pranay Srivastava
2016-06-02 10:24     ` [PATCH v2 2/5]nbd: cleanup nbd_set_socket Pranay Kr. Srivastava
2016-06-02 10:24     ` [PATCH v2 3/5]nbd: fix various coding standard warnings Pranay Kr. Srivastava
2016-06-02 10:25     ` [PATCH v2 4/5]nbd: make nbd device wait for its users Pranay Kr. Srivastava
2016-06-14  8:59       ` Markus Pargmann
2016-06-14  9:33         ` Pranay Srivastava
2016-06-15  6:30           ` Markus Pargmann
2016-06-15  7:00             ` [Nbd] " Wouter Verhelst
2016-06-15  9:18               ` Pranay Srivastava
2016-06-15  9:17             ` Pranay Srivastava
2016-06-24  9:29               ` [PATCH 1/2] nbd: " Markus Pargmann
2016-06-24  9:29                 ` [PATCH 2/2] nbd: Disallow ioctls on disconnected block device Markus Pargmann
2016-07-16  7:42                   ` Pranay Srivastava
2016-07-16  9:32                     ` [Nbd] " Alex Bligh
2016-07-16 10:08                       ` Pranay Srivastava
2016-07-16 11:26                         ` Wouter Verhelst
2016-07-16 13:31                           ` Pranay Srivastava
2016-06-24  9:39                 ` [PATCH 1/2] nbd: make nbd device wait for its users Pranay Srivastava
2016-06-24 13:40                 ` [Nbd] " Eric Blake
2016-06-25 17:52                 ` Pranay Srivastava
2016-06-29  6:57                   ` Markus Pargmann
2016-06-02 10:25     ` [PATCH v2 5/5]nbd: use device_attr macros for sysfs attribute Pranay Kr. Srivastava
     [not found]     ` <CA+aCy1E0S4ofa04xcO9qxQmuipaF5wdnrv3ubSvETn-rBYYisA@mail.gmail.com>
2016-06-06 11:07       ` [PATCH 0/4]nbd: fixes for nbd Pranay Srivastava

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).