[PATCH v4 0/2] nbd: fix possible page fault for nbd disk

linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v4 0/2] nbd: fix possible page fault for nbd disk
@ 2019-09-17 11:56 xiubli
  2019-09-17 11:56 ` [PATCH v4 1/2] nbd: rename the runtime flags as NBD_RT_ prefixed xiubli
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: xiubli @ 2019-09-17 11:56 UTC (permalink / raw)
  To: josef, axboe; +Cc: mchristi, linux-block, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

V3:
- fix the case that when the NBD_CFLAG_DESTROY_ON_DISCONNECT bit is not set.
- add "nbd: rename the runtime flags as NBD_RT_ prefixed"

V4:
- Address the use after free bug from Mike's comments
- This has been test for 3 days, works well.


Xiubo Li (2):
  nbd: rename the runtime flags as NBD_RT_ prefixed
  nbd: fix possible page fault for nbd disk

 drivers/block/nbd.c | 108 +++++++++++++++++++++++++++++---------------
 1 file changed, 72 insertions(+), 36 deletions(-)

-- 
2.21.0


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v4 1/2] nbd: rename the runtime flags as NBD_RT_ prefixed
  2019-09-17 11:56 [PATCH v4 0/2] nbd: fix possible page fault for nbd disk xiubli
@ 2019-09-17 11:56 ` xiubli
  2019-09-17 11:56 ` [PATCH v4 2/2] nbd: fix possible page fault for nbd disk xiubli
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: xiubli @ 2019-09-17 11:56 UTC (permalink / raw)
  To: josef, axboe; +Cc: mchristi, linux-block, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

Preparing for the destory when disconnecting crash fixing.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 drivers/block/nbd.c | 74 ++++++++++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index a8e3815295fe..7e0501c47153 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -71,14 +71,14 @@ struct link_dead_args {
 	int index;
 };
 
-#define NBD_TIMEDOUT			0
-#define NBD_DISCONNECT_REQUESTED	1
-#define NBD_DISCONNECTED		2
-#define NBD_HAS_PID_FILE		3
-#define NBD_HAS_CONFIG_REF		4
-#define NBD_BOUND			5
-#define NBD_DESTROY_ON_DISCONNECT	6
-#define NBD_DISCONNECT_ON_CLOSE 	7
+#define NBD_RT_TIMEDOUT			0
+#define NBD_RT_DISCONNECT_REQUESTED	1
+#define NBD_RT_DISCONNECTED		2
+#define NBD_RT_HAS_PID_FILE		3
+#define NBD_RT_HAS_CONFIG_REF		4
+#define NBD_RT_BOUND			5
+#define NBD_RT_DESTROY_ON_DISCONNECT	6
+#define NBD_RT_DISCONNECT_ON_CLOSE	7
 
 struct nbd_config {
 	u32 flags;
@@ -238,8 +238,8 @@ static void nbd_put(struct nbd_device *nbd)
 
 static int nbd_disconnected(struct nbd_config *config)
 {
-	return test_bit(NBD_DISCONNECTED, &config->runtime_flags) ||
-		test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+	return test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags) ||
+		test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
 }
 
 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
@@ -257,9 +257,9 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
 	if (!nsock->dead) {
 		kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
 		if (atomic_dec_return(&nbd->config->live_connections) == 0) {
-			if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED,
+			if (test_and_clear_bit(NBD_RT_DISCONNECT_REQUESTED,
 					       &nbd->config->runtime_flags)) {
-				set_bit(NBD_DISCONNECTED,
+				set_bit(NBD_RT_DISCONNECTED,
 					&nbd->config->runtime_flags);
 				dev_info(nbd_to_dev(nbd),
 					"Disconnected due to user request.\n");
@@ -333,7 +333,7 @@ static void sock_shutdown(struct nbd_device *nbd)
 
 	if (config->num_connections == 0)
 		return;
-	if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags))
+	if (test_and_set_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
 		return;
 
 	for (i = 0; i < config->num_connections; i++) {
@@ -427,7 +427,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 	}
 
 	dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
-	set_bit(NBD_TIMEDOUT, &config->runtime_flags);
+	set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
 	cmd->status = BLK_STS_IOERR;
 	mutex_unlock(&cmd->lock);
 	sock_shutdown(nbd);
@@ -795,7 +795,7 @@ static int find_fallback(struct nbd_device *nbd, int index)
 	struct nbd_sock *nsock = config->socks[index];
 	int fallback = nsock->fallback_index;
 
-	if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+	if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
 		return new_index;
 
 	if (config->num_connections <= 1) {
@@ -836,7 +836,7 @@ static int wait_for_reconnect(struct nbd_device *nbd)
 	struct nbd_config *config = nbd->config;
 	if (!config->dead_conn_timeout)
 		return 0;
-	if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+	if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
 		return 0;
 	return wait_event_timeout(config->conn_wait,
 				  atomic_read(&config->live_connections) > 0,
@@ -969,12 +969,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
 		return err;
 
 	if (!netlink && !nbd->task_setup &&
-	    !test_bit(NBD_BOUND, &config->runtime_flags))
+	    !test_bit(NBD_RT_BOUND, &config->runtime_flags))
 		nbd->task_setup = current;
 
 	if (!netlink &&
 	    (nbd->task_setup != current ||
-	     test_bit(NBD_BOUND, &config->runtime_flags))) {
+	     test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
 		dev_err(disk_to_dev(nbd->disk),
 			"Device being setup by another task");
 		sockfd_put(sock);
@@ -1053,7 +1053,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
 		mutex_unlock(&nsock->tx_lock);
 		sockfd_put(old);
 
-		clear_bit(NBD_DISCONNECTED, &config->runtime_flags);
+		clear_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
 
 		/* We take the tx_mutex in an error path in the recv_work, so we
 		 * need to queue_work outside of the tx_mutex.
@@ -1124,7 +1124,7 @@ static int nbd_disconnect(struct nbd_device *nbd)
 	struct nbd_config *config = nbd->config;
 
 	dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
-	set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+	set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
 	send_disconnects(nbd);
 	return 0;
 }
@@ -1143,7 +1143,7 @@ static void nbd_config_put(struct nbd_device *nbd)
 		struct nbd_config *config = nbd->config;
 		nbd_dev_dbg_close(nbd);
 		nbd_size_clear(nbd);
-		if (test_and_clear_bit(NBD_HAS_PID_FILE,
+		if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
 				       &config->runtime_flags))
 			device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
 		nbd->task_recv = NULL;
@@ -1209,7 +1209,7 @@ static int nbd_start_device(struct nbd_device *nbd)
 		dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
 		return error;
 	}
-	set_bit(NBD_HAS_PID_FILE, &config->runtime_flags);
+	set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags);
 
 	nbd_dev_dbg_init(nbd);
 	for (i = 0; i < num_connections; i++) {
@@ -1256,9 +1256,9 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
 	mutex_lock(&nbd->config_lock);
 	nbd_bdev_reset(bdev);
 	/* user requested, ignore socket errors */
-	if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
+	if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
 		ret = 0;
-	if (test_bit(NBD_TIMEDOUT, &config->runtime_flags))
+	if (test_bit(NBD_RT_TIMEDOUT, &config->runtime_flags))
 		ret = -ETIMEDOUT;
 	return ret;
 }
@@ -1269,7 +1269,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
 	sock_shutdown(nbd);
 	__invalidate_device(bdev, true);
 	nbd_bdev_reset(bdev);
-	if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+	if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
 			       &nbd->config->runtime_flags))
 		nbd_config_put(nbd);
 }
@@ -1364,7 +1364,7 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 	/* Don't allow ioctl operations on a nbd device that was created with
 	 * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine.
 	 */
-	if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+	if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
 	    (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
 		error = __nbd_ioctl(bdev, nbd, cmd, arg);
 	else
@@ -1435,7 +1435,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
 	struct nbd_device *nbd = disk->private_data;
 	struct block_device *bdev = bdget_disk(disk, 0);
 
-	if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
+	if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
 			bdev->bd_openers == 0)
 		nbd_disconnect_and_put(nbd);
 
@@ -1833,7 +1833,7 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 		return -ENOMEM;
 	}
 	refcount_set(&nbd->config_refs, 1);
-	set_bit(NBD_BOUND, &config->runtime_flags);
+	set_bit(NBD_RT_BOUND, &config->runtime_flags);
 
 	ret = nbd_genl_size_set(info, nbd);
 	if (ret)
@@ -1853,12 +1853,12 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
 		u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
 		if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
-			set_bit(NBD_DESTROY_ON_DISCONNECT,
+			set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
 				&config->runtime_flags);
 			put_dev = true;
 		}
 		if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
-			set_bit(NBD_DISCONNECT_ON_CLOSE,
+			set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
 				&config->runtime_flags);
 		}
 	}
@@ -1897,7 +1897,7 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 out:
 	mutex_unlock(&nbd->config_lock);
 	if (!ret) {
-		set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags);
+		set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
 		refcount_inc(&nbd->config_refs);
 		nbd_connect_reply(info, nbd->index);
 	}
@@ -1919,7 +1919,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
 	 * queue.
 	 */
 	flush_workqueue(nbd->recv_workq);
-	if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+	if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
 			       &nbd->config->runtime_flags))
 		nbd_config_put(nbd);
 }
@@ -2003,7 +2003,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
 
 	mutex_lock(&nbd->config_lock);
 	config = nbd->config;
-	if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+	if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
 	    !nbd->task_recv) {
 		dev_err(nbd_to_dev(nbd),
 			"not configured, cannot reconfigure\n");
@@ -2026,20 +2026,20 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
 		u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
 		if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
-			if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+			if (!test_and_set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
 					      &config->runtime_flags))
 				put_dev = true;
 		} else {
-			if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+			if (test_and_clear_bit(NBD_RT_DESTROY_ON_DISCONNECT,
 					       &config->runtime_flags))
 				refcount_inc(&nbd->refs);
 		}
 
 		if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
-			set_bit(NBD_DISCONNECT_ON_CLOSE,
+			set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
 					&config->runtime_flags);
 		} else {
-			clear_bit(NBD_DISCONNECT_ON_CLOSE,
+			clear_bit(NBD_RT_DISCONNECT_ON_CLOSE,
 					&config->runtime_flags);
 		}
 	}
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v4 2/2] nbd: fix possible page fault for nbd disk
  2019-09-17 11:56 [PATCH v4 0/2] nbd: fix possible page fault for nbd disk xiubli
  2019-09-17 11:56 ` [PATCH v4 1/2] nbd: rename the runtime flags as NBD_RT_ prefixed xiubli
@ 2019-09-17 11:56 ` xiubli
  2019-09-17 18:31   ` Mike Christie
  2019-09-17 18:15 ` [PATCH v4 0/2] " Josef Bacik
  2019-09-17 20:52 ` Jens Axboe
  3 siblings, 1 reply; 10+ messages in thread
From: xiubli @ 2019-09-17 11:56 UTC (permalink / raw)
  To: josef, axboe; +Cc: mchristi, linux-block, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

When the NBD_CFLAG_DESTROY_ON_DISCONNECT flag is set and at the same
time when the socket is closed due to the server daemon is restarted,
just before the last DISCONNET is totally done if we start a new connection
by using the old nbd_index, there will be crashing randomly, like:

<3>[  110.151949] block nbd1: Receive control failed (result -32)
<1>[  110.152024] BUG: unable to handle page fault for address: 0000058000000840
<1>[  110.152063] #PF: supervisor read access in kernel mode
<1>[  110.152083] #PF: error_code(0x0000) - not-present page
<6>[  110.152094] PGD 0 P4D 0
<4>[  110.152106] Oops: 0000 [#1] SMP PTI
<4>[  110.152120] CPU: 0 PID: 6698 Comm: kworker/u5:1 Kdump: loaded Not tainted 5.3.0-rc4+ #2
<4>[  110.152136] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
<4>[  110.152166] Workqueue: knbd-recv recv_work [nbd]
<4>[  110.152187] RIP: 0010:__dev_printk+0xd/0x67
<4>[  110.152206] Code: 10 e8 c5 fd ff ff 48 8b 4c 24 18 65 48 33 0c 25 28 00 [...]
<4>[  110.152244] RSP: 0018:ffffa41581f13d18 EFLAGS: 00010206
<4>[  110.152256] RAX: ffffa41581f13d30 RBX: ffff96dd7374e900 RCX: 0000000000000000
<4>[  110.152271] RDX: ffffa41581f13d20 RSI: 00000580000007f0 RDI: ffffffff970ec24f
<4>[  110.152285] RBP: ffffa41581f13d80 R08: ffff96dd7fc17908 R09: 0000000000002e56
<4>[  110.152299] R10: ffffffff970ec24f R11: 0000000000000003 R12: ffff96dd7374e900
<4>[  110.152313] R13: 0000000000000000 R14: ffff96dd7374e9d8 R15: ffff96dd6e3b02c8
<4>[  110.152329] FS:  0000000000000000(0000) GS:ffff96dd7fc00000(0000) knlGS:0000000000000000
<4>[  110.152362] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>[  110.152383] CR2: 0000058000000840 CR3: 0000000067cc6002 CR4: 00000000001606f0
<4>[  110.152401] Call Trace:
<4>[  110.152422]  _dev_err+0x6c/0x83
<4>[  110.152435]  nbd_read_stat.cold+0xda/0x578 [nbd]
<4>[  110.152448]  ? __switch_to_asm+0x34/0x70
<4>[  110.152468]  ? __switch_to_asm+0x40/0x70
<4>[  110.152478]  ? __switch_to_asm+0x34/0x70
<4>[  110.152491]  ? __switch_to_asm+0x40/0x70
<4>[  110.152501]  ? __switch_to_asm+0x34/0x70
<4>[  110.152511]  ? __switch_to_asm+0x40/0x70
<4>[  110.152522]  ? __switch_to_asm+0x34/0x70
<4>[  110.152533]  recv_work+0x35/0x9e [nbd]
<4>[  110.152547]  process_one_work+0x19d/0x340
<4>[  110.152558]  worker_thread+0x50/0x3b0
<4>[  110.152568]  kthread+0xfb/0x130
<4>[  110.152577]  ? process_one_work+0x340/0x340
<4>[  110.152609]  ? kthread_park+0x80/0x80
<4>[  110.152637]  ret_from_fork+0x35/0x40

This is very easy to reproduce by running the nbd-runner.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 drivers/block/nbd.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7e0501c47153..ac07e8c94c79 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -26,6 +26,7 @@
 #include <linux/ioctl.h>
 #include <linux/mutex.h>
 #include <linux/compiler.h>
+#include <linux/completion.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -80,6 +81,9 @@ struct link_dead_args {
 #define NBD_RT_DESTROY_ON_DISCONNECT	6
 #define NBD_RT_DISCONNECT_ON_CLOSE	7
 
+#define NBD_DESTROY_ON_DISCONNECT	0
+#define NBD_DISCONNECT_REQUESTED	1
+
 struct nbd_config {
 	u32 flags;
 	unsigned long runtime_flags;
@@ -113,6 +117,9 @@ struct nbd_device {
 	struct list_head list;
 	struct task_struct *task_recv;
 	struct task_struct *task_setup;
+
+	struct completion *destroy_complete;
+	unsigned long flags;
 };
 
 #define NBD_CMD_REQUEUED	1
@@ -223,6 +230,16 @@ static void nbd_dev_remove(struct nbd_device *nbd)
 		disk->private_data = NULL;
 		put_disk(disk);
 	}
+
+	/*
+	 * Place this in the last just before the nbd is freed to
+	 * make sure that the disk and the related kobject are also
+	 * totally removed to avoid duplicate creation of the same
+	 * one.
+	 */
+	if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
+		complete(nbd->destroy_complete);
+
 	kfree(nbd);
 }
 
@@ -1125,6 +1142,7 @@ static int nbd_disconnect(struct nbd_device *nbd)
 
 	dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
 	set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
+	set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
 	send_disconnects(nbd);
 	return 0;
 }
@@ -1636,6 +1654,7 @@ static int nbd_dev_add(int index)
 	nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
 		BLK_MQ_F_BLOCKING;
 	nbd->tag_set.driver_data = nbd;
+	nbd->destroy_complete = NULL;
 
 	err = blk_mq_alloc_tag_set(&nbd->tag_set);
 	if (err)
@@ -1750,6 +1769,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
 
 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 {
+	DECLARE_COMPLETION_ONSTACK(destroy_complete);
 	struct nbd_device *nbd = NULL;
 	struct nbd_config *config;
 	int index = -1;
@@ -1801,6 +1821,17 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 		mutex_unlock(&nbd_index_mutex);
 		return -EINVAL;
 	}
+
+	if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
+	    test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
+		nbd->destroy_complete = &destroy_complete;
+		mutex_unlock(&nbd_index_mutex);
+
+		/* Wait untill the the nbd stuff is totally destroyed */
+		wait_for_completion(&destroy_complete);
+		goto again;
+	}
+
 	if (!refcount_inc_not_zero(&nbd->refs)) {
 		mutex_unlock(&nbd_index_mutex);
 		if (index == -1)
@@ -1855,7 +1886,10 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 		if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
 			set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
 				&config->runtime_flags);
+			set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
 			put_dev = true;
+		} else {
+			clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
 		}
 		if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
 			set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
@@ -2029,10 +2063,12 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
 			if (!test_and_set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
 					      &config->runtime_flags))
 				put_dev = true;
+			set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
 		} else {
 			if (test_and_clear_bit(NBD_RT_DESTROY_ON_DISCONNECT,
 					       &config->runtime_flags))
 				refcount_inc(&nbd->refs);
+			clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
 		}
 
 		if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 0/2] nbd: fix possible page fault for nbd disk
  2019-09-17 11:56 [PATCH v4 0/2] nbd: fix possible page fault for nbd disk xiubli
  2019-09-17 11:56 ` [PATCH v4 1/2] nbd: rename the runtime flags as NBD_RT_ prefixed xiubli
  2019-09-17 11:56 ` [PATCH v4 2/2] nbd: fix possible page fault for nbd disk xiubli
@ 2019-09-17 18:15 ` Josef Bacik
  2019-09-17 20:52 ` Jens Axboe
  3 siblings, 0 replies; 10+ messages in thread
From: Josef Bacik @ 2019-09-17 18:15 UTC (permalink / raw)
  To: xiubli; +Cc: josef, axboe, mchristi, linux-block

On Tue, Sep 17, 2019 at 05:26:04PM +0530, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> V3:
> - fix the case that when the NBD_CFLAG_DESTROY_ON_DISCONNECT bit is not set.
> - add "nbd: rename the runtime flags as NBD_RT_ prefixed"
> 
> V4:
> - Address the use after free bug from Mike's comments
> - This has been test for 3 days, works well.
> 
> 

You can add

Reviewed-by: Josef Bacik <josef@toxicpanda.com>

to the series, Thanks,

Josef

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 2/2] nbd: fix possible page fault for nbd disk
  2019-09-17 11:56 ` [PATCH v4 2/2] nbd: fix possible page fault for nbd disk xiubli
@ 2019-09-17 18:31   ` Mike Christie
  2019-09-17 18:40     ` Josef Bacik
  0 siblings, 1 reply; 10+ messages in thread
From: Mike Christie @ 2019-09-17 18:31 UTC (permalink / raw)
  To: xiubli, josef, axboe; +Cc: linux-block

On 09/17/2019 06:56 AM, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> When the NBD_CFLAG_DESTROY_ON_DISCONNECT flag is set and at the same
> time when the socket is closed due to the server daemon is restarted,
> just before the last DISCONNET is totally done if we start a new connection
> by using the old nbd_index, there will be crashing randomly, like:
> 
> <3>[  110.151949] block nbd1: Receive control failed (result -32)
> <1>[  110.152024] BUG: unable to handle page fault for address: 0000058000000840
> <1>[  110.152063] #PF: supervisor read access in kernel mode
> <1>[  110.152083] #PF: error_code(0x0000) - not-present page
> <6>[  110.152094] PGD 0 P4D 0
> <4>[  110.152106] Oops: 0000 [#1] SMP PTI
> <4>[  110.152120] CPU: 0 PID: 6698 Comm: kworker/u5:1 Kdump: loaded Not tainted 5.3.0-rc4+ #2
> <4>[  110.152136] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
> <4>[  110.152166] Workqueue: knbd-recv recv_work [nbd]
> <4>[  110.152187] RIP: 0010:__dev_printk+0xd/0x67
> <4>[  110.152206] Code: 10 e8 c5 fd ff ff 48 8b 4c 24 18 65 48 33 0c 25 28 00 [...]
> <4>[  110.152244] RSP: 0018:ffffa41581f13d18 EFLAGS: 00010206
> <4>[  110.152256] RAX: ffffa41581f13d30 RBX: ffff96dd7374e900 RCX: 0000000000000000
> <4>[  110.152271] RDX: ffffa41581f13d20 RSI: 00000580000007f0 RDI: ffffffff970ec24f
> <4>[  110.152285] RBP: ffffa41581f13d80 R08: ffff96dd7fc17908 R09: 0000000000002e56
> <4>[  110.152299] R10: ffffffff970ec24f R11: 0000000000000003 R12: ffff96dd7374e900
> <4>[  110.152313] R13: 0000000000000000 R14: ffff96dd7374e9d8 R15: ffff96dd6e3b02c8
> <4>[  110.152329] FS:  0000000000000000(0000) GS:ffff96dd7fc00000(0000) knlGS:0000000000000000
> <4>[  110.152362] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> <4>[  110.152383] CR2: 0000058000000840 CR3: 0000000067cc6002 CR4: 00000000001606f0
> <4>[  110.152401] Call Trace:
> <4>[  110.152422]  _dev_err+0x6c/0x83
> <4>[  110.152435]  nbd_read_stat.cold+0xda/0x578 [nbd]
> <4>[  110.152448]  ? __switch_to_asm+0x34/0x70
> <4>[  110.152468]  ? __switch_to_asm+0x40/0x70
> <4>[  110.152478]  ? __switch_to_asm+0x34/0x70
> <4>[  110.152491]  ? __switch_to_asm+0x40/0x70
> <4>[  110.152501]  ? __switch_to_asm+0x34/0x70
> <4>[  110.152511]  ? __switch_to_asm+0x40/0x70
> <4>[  110.152522]  ? __switch_to_asm+0x34/0x70
> <4>[  110.152533]  recv_work+0x35/0x9e [nbd]
> <4>[  110.152547]  process_one_work+0x19d/0x340
> <4>[  110.152558]  worker_thread+0x50/0x3b0
> <4>[  110.152568]  kthread+0xfb/0x130
> <4>[  110.152577]  ? process_one_work+0x340/0x340
> <4>[  110.152609]  ? kthread_park+0x80/0x80
> <4>[  110.152637]  ret_from_fork+0x35/0x40
> 
> This is very easy to reproduce by running the nbd-runner.
> 
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  drivers/block/nbd.c | 36 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 36 insertions(+)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 7e0501c47153..ac07e8c94c79 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -26,6 +26,7 @@
>  #include <linux/ioctl.h>
>  #include <linux/mutex.h>
>  #include <linux/compiler.h>
> +#include <linux/completion.h>
>  #include <linux/err.h>
>  #include <linux/kernel.h>
>  #include <linux/slab.h>
> @@ -80,6 +81,9 @@ struct link_dead_args {
>  #define NBD_RT_DESTROY_ON_DISCONNECT	6
>  #define NBD_RT_DISCONNECT_ON_CLOSE	7
>  
> +#define NBD_DESTROY_ON_DISCONNECT	0
> +#define NBD_DISCONNECT_REQUESTED	1
> +
>  struct nbd_config {
>  	u32 flags;
>  	unsigned long runtime_flags;
> @@ -113,6 +117,9 @@ struct nbd_device {
>  	struct list_head list;
>  	struct task_struct *task_recv;
>  	struct task_struct *task_setup;
> +
> +	struct completion *destroy_complete;
> +	unsigned long flags;
>  };
>  
>  #define NBD_CMD_REQUEUED	1
> @@ -223,6 +230,16 @@ static void nbd_dev_remove(struct nbd_device *nbd)
>  		disk->private_data = NULL;
>  		put_disk(disk);
>  	}
> +
> +	/*
> +	 * Place this in the last just before the nbd is freed to
> +	 * make sure that the disk and the related kobject are also
> +	 * totally removed to avoid duplicate creation of the same
> +	 * one.
> +	 */
> +	if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
> +		complete(nbd->destroy_complete);
> +
>  	kfree(nbd);
>  }
>  
> @@ -1125,6 +1142,7 @@ static int nbd_disconnect(struct nbd_device *nbd)
>  
>  	dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
>  	set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
> +	set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
>  	send_disconnects(nbd);
>  	return 0;
>  }
> @@ -1636,6 +1654,7 @@ static int nbd_dev_add(int index)
>  	nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
>  		BLK_MQ_F_BLOCKING;
>  	nbd->tag_set.driver_data = nbd;
> +	nbd->destroy_complete = NULL;
>  
>  	err = blk_mq_alloc_tag_set(&nbd->tag_set);
>  	if (err)
> @@ -1750,6 +1769,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
>  
>  static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
>  {
> +	DECLARE_COMPLETION_ONSTACK(destroy_complete);
>  	struct nbd_device *nbd = NULL;
>  	struct nbd_config *config;
>  	int index = -1;
> @@ -1801,6 +1821,17 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
>  		mutex_unlock(&nbd_index_mutex);
>  		return -EINVAL;
>  	}
> +
> +	if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
> +	    test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {

You still need the nbd_put mutex part of the v3 patch don't you?

nbd_dev_remove could call kfree(nbd) while we are accessing the nbd
device struct above.

> +		nbd->destroy_complete = &destroy_complete;

Also, without the mutex part of the v3 patch, we could race and
nbd_dev_remove could have passed the destroy_complete check already, so
below we will wait forever.


> +		mutex_unlock(&nbd_index_mutex);
> +
> +		/* Wait untill the the nbd stuff is totally destroyed */
> +		wait_for_completion(&destroy_complete);
> +		goto again;
> +	}
> +



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 2/2] nbd: fix possible page fault for nbd disk
  2019-09-17 18:31   ` Mike Christie
@ 2019-09-17 18:40     ` Josef Bacik
  2019-09-17 19:36       ` Mike Christie
  2019-09-17 19:44       ` Mike Christie
  0 siblings, 2 replies; 10+ messages in thread
From: Josef Bacik @ 2019-09-17 18:40 UTC (permalink / raw)
  To: Mike Christie; +Cc: xiubli, josef, axboe, linux-block

On Tue, Sep 17, 2019 at 01:31:05PM -0500, Mike Christie wrote:
> On 09/17/2019 06:56 AM, xiubli@redhat.com wrote:
> > From: Xiubo Li <xiubli@redhat.com>
> > 
> > When the NBD_CFLAG_DESTROY_ON_DISCONNECT flag is set and at the same
> > time when the socket is closed due to the server daemon is restarted,
> > just before the last DISCONNET is totally done if we start a new connection
> > by using the old nbd_index, there will be crashing randomly, like:
> > 
> > <3>[  110.151949] block nbd1: Receive control failed (result -32)
> > <1>[  110.152024] BUG: unable to handle page fault for address: 0000058000000840
> > <1>[  110.152063] #PF: supervisor read access in kernel mode
> > <1>[  110.152083] #PF: error_code(0x0000) - not-present page
> > <6>[  110.152094] PGD 0 P4D 0
> > <4>[  110.152106] Oops: 0000 [#1] SMP PTI
> > <4>[  110.152120] CPU: 0 PID: 6698 Comm: kworker/u5:1 Kdump: loaded Not tainted 5.3.0-rc4+ #2
> > <4>[  110.152136] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
> > <4>[  110.152166] Workqueue: knbd-recv recv_work [nbd]
> > <4>[  110.152187] RIP: 0010:__dev_printk+0xd/0x67
> > <4>[  110.152206] Code: 10 e8 c5 fd ff ff 48 8b 4c 24 18 65 48 33 0c 25 28 00 [...]
> > <4>[  110.152244] RSP: 0018:ffffa41581f13d18 EFLAGS: 00010206
> > <4>[  110.152256] RAX: ffffa41581f13d30 RBX: ffff96dd7374e900 RCX: 0000000000000000
> > <4>[  110.152271] RDX: ffffa41581f13d20 RSI: 00000580000007f0 RDI: ffffffff970ec24f
> > <4>[  110.152285] RBP: ffffa41581f13d80 R08: ffff96dd7fc17908 R09: 0000000000002e56
> > <4>[  110.152299] R10: ffffffff970ec24f R11: 0000000000000003 R12: ffff96dd7374e900
> > <4>[  110.152313] R13: 0000000000000000 R14: ffff96dd7374e9d8 R15: ffff96dd6e3b02c8
> > <4>[  110.152329] FS:  0000000000000000(0000) GS:ffff96dd7fc00000(0000) knlGS:0000000000000000
> > <4>[  110.152362] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > <4>[  110.152383] CR2: 0000058000000840 CR3: 0000000067cc6002 CR4: 00000000001606f0
> > <4>[  110.152401] Call Trace:
> > <4>[  110.152422]  _dev_err+0x6c/0x83
> > <4>[  110.152435]  nbd_read_stat.cold+0xda/0x578 [nbd]
> > <4>[  110.152448]  ? __switch_to_asm+0x34/0x70
> > <4>[  110.152468]  ? __switch_to_asm+0x40/0x70
> > <4>[  110.152478]  ? __switch_to_asm+0x34/0x70
> > <4>[  110.152491]  ? __switch_to_asm+0x40/0x70
> > <4>[  110.152501]  ? __switch_to_asm+0x34/0x70
> > <4>[  110.152511]  ? __switch_to_asm+0x40/0x70
> > <4>[  110.152522]  ? __switch_to_asm+0x34/0x70
> > <4>[  110.152533]  recv_work+0x35/0x9e [nbd]
> > <4>[  110.152547]  process_one_work+0x19d/0x340
> > <4>[  110.152558]  worker_thread+0x50/0x3b0
> > <4>[  110.152568]  kthread+0xfb/0x130
> > <4>[  110.152577]  ? process_one_work+0x340/0x340
> > <4>[  110.152609]  ? kthread_park+0x80/0x80
> > <4>[  110.152637]  ret_from_fork+0x35/0x40
> > 
> > This is very easy to reproduce by running the nbd-runner.
> > 
> > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > ---
> >  drivers/block/nbd.c | 36 ++++++++++++++++++++++++++++++++++++
> >  1 file changed, 36 insertions(+)
> > 
> > diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> > index 7e0501c47153..ac07e8c94c79 100644
> > --- a/drivers/block/nbd.c
> > +++ b/drivers/block/nbd.c
> > @@ -26,6 +26,7 @@
> >  #include <linux/ioctl.h>
> >  #include <linux/mutex.h>
> >  #include <linux/compiler.h>
> > +#include <linux/completion.h>
> >  #include <linux/err.h>
> >  #include <linux/kernel.h>
> >  #include <linux/slab.h>
> > @@ -80,6 +81,9 @@ struct link_dead_args {
> >  #define NBD_RT_DESTROY_ON_DISCONNECT	6
> >  #define NBD_RT_DISCONNECT_ON_CLOSE	7
> >  
> > +#define NBD_DESTROY_ON_DISCONNECT	0
> > +#define NBD_DISCONNECT_REQUESTED	1
> > +
> >  struct nbd_config {
> >  	u32 flags;
> >  	unsigned long runtime_flags;
> > @@ -113,6 +117,9 @@ struct nbd_device {
> >  	struct list_head list;
> >  	struct task_struct *task_recv;
> >  	struct task_struct *task_setup;
> > +
> > +	struct completion *destroy_complete;
> > +	unsigned long flags;
> >  };
> >  
> >  #define NBD_CMD_REQUEUED	1
> > @@ -223,6 +230,16 @@ static void nbd_dev_remove(struct nbd_device *nbd)
> >  		disk->private_data = NULL;
> >  		put_disk(disk);
> >  	}
> > +
> > +	/*
> > +	 * Place this in the last just before the nbd is freed to
> > +	 * make sure that the disk and the related kobject are also
> > +	 * totally removed to avoid duplicate creation of the same
> > +	 * one.
> > +	 */
> > +	if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
> > +		complete(nbd->destroy_complete);
> > +
> >  	kfree(nbd);
> >  }
> >  
> > @@ -1125,6 +1142,7 @@ static int nbd_disconnect(struct nbd_device *nbd)
> >  
> >  	dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
> >  	set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
> > +	set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
> >  	send_disconnects(nbd);
> >  	return 0;
> >  }
> > @@ -1636,6 +1654,7 @@ static int nbd_dev_add(int index)
> >  	nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
> >  		BLK_MQ_F_BLOCKING;
> >  	nbd->tag_set.driver_data = nbd;
> > +	nbd->destroy_complete = NULL;
> >  
> >  	err = blk_mq_alloc_tag_set(&nbd->tag_set);
> >  	if (err)
> > @@ -1750,6 +1769,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
> >  
> >  static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
> >  {
> > +	DECLARE_COMPLETION_ONSTACK(destroy_complete);
> >  	struct nbd_device *nbd = NULL;
> >  	struct nbd_config *config;
> >  	int index = -1;
> > @@ -1801,6 +1821,17 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
> >  		mutex_unlock(&nbd_index_mutex);
> >  		return -EINVAL;
> >  	}
> > +
> > +	if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
> > +	    test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
> 
> You still need the nbd_put mutex part of the v3 patch don't you?
> 
> nbd_dev_remove could call kfree(nbd) while we are accessing the nbd
> device struct above.
> 

We're still holding the mutex here, so this is safe right?

> > +		nbd->destroy_complete = &destroy_complete;
> 
> Also, without the mutex part of the v3 patch, we could race and
> nbd_dev_remove could have passed the destroy_complete check already, so
> below we will wait forever.
> 

Oh hmm you're right, we need to re-init the completion every time.  I retract my
reviewed-by I guess.  Thanks,

Josef

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 2/2] nbd: fix possible page fault for nbd disk
  2019-09-17 18:40     ` Josef Bacik
@ 2019-09-17 19:36       ` Mike Christie
  2019-09-17 19:44       ` Mike Christie
  1 sibling, 0 replies; 10+ messages in thread
From: Mike Christie @ 2019-09-17 19:36 UTC (permalink / raw)
  To: Josef Bacik; +Cc: xiubli, axboe, linux-block

On 09/17/2019 01:40 PM, Josef Bacik wrote:
> On Tue, Sep 17, 2019 at 01:31:05PM -0500, Mike Christie wrote:
>> On 09/17/2019 06:56 AM, xiubli@redhat.com wrote:
>>> From: Xiubo Li <xiubli@redhat.com>
>>>
>>> When the NBD_CFLAG_DESTROY_ON_DISCONNECT flag is set and at the same
>>> time when the socket is closed due to the server daemon is restarted,
>>> just before the last DISCONNET is totally done if we start a new connection
>>> by using the old nbd_index, there will be crashing randomly, like:
>>>
>>> <3>[  110.151949] block nbd1: Receive control failed (result -32)
>>> <1>[  110.152024] BUG: unable to handle page fault for address: 0000058000000840
>>> <1>[  110.152063] #PF: supervisor read access in kernel mode
>>> <1>[  110.152083] #PF: error_code(0x0000) - not-present page
>>> <6>[  110.152094] PGD 0 P4D 0
>>> <4>[  110.152106] Oops: 0000 [#1] SMP PTI
>>> <4>[  110.152120] CPU: 0 PID: 6698 Comm: kworker/u5:1 Kdump: loaded Not tainted 5.3.0-rc4+ #2
>>> <4>[  110.152136] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
>>> <4>[  110.152166] Workqueue: knbd-recv recv_work [nbd]
>>> <4>[  110.152187] RIP: 0010:__dev_printk+0xd/0x67
>>> <4>[  110.152206] Code: 10 e8 c5 fd ff ff 48 8b 4c 24 18 65 48 33 0c 25 28 00 [...]
>>> <4>[  110.152244] RSP: 0018:ffffa41581f13d18 EFLAGS: 00010206
>>> <4>[  110.152256] RAX: ffffa41581f13d30 RBX: ffff96dd7374e900 RCX: 0000000000000000
>>> <4>[  110.152271] RDX: ffffa41581f13d20 RSI: 00000580000007f0 RDI: ffffffff970ec24f
>>> <4>[  110.152285] RBP: ffffa41581f13d80 R08: ffff96dd7fc17908 R09: 0000000000002e56
>>> <4>[  110.152299] R10: ffffffff970ec24f R11: 0000000000000003 R12: ffff96dd7374e900
>>> <4>[  110.152313] R13: 0000000000000000 R14: ffff96dd7374e9d8 R15: ffff96dd6e3b02c8
>>> <4>[  110.152329] FS:  0000000000000000(0000) GS:ffff96dd7fc00000(0000) knlGS:0000000000000000
>>> <4>[  110.152362] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> <4>[  110.152383] CR2: 0000058000000840 CR3: 0000000067cc6002 CR4: 00000000001606f0
>>> <4>[  110.152401] Call Trace:
>>> <4>[  110.152422]  _dev_err+0x6c/0x83
>>> <4>[  110.152435]  nbd_read_stat.cold+0xda/0x578 [nbd]
>>> <4>[  110.152448]  ? __switch_to_asm+0x34/0x70
>>> <4>[  110.152468]  ? __switch_to_asm+0x40/0x70
>>> <4>[  110.152478]  ? __switch_to_asm+0x34/0x70
>>> <4>[  110.152491]  ? __switch_to_asm+0x40/0x70
>>> <4>[  110.152501]  ? __switch_to_asm+0x34/0x70
>>> <4>[  110.152511]  ? __switch_to_asm+0x40/0x70
>>> <4>[  110.152522]  ? __switch_to_asm+0x34/0x70
>>> <4>[  110.152533]  recv_work+0x35/0x9e [nbd]
>>> <4>[  110.152547]  process_one_work+0x19d/0x340
>>> <4>[  110.152558]  worker_thread+0x50/0x3b0
>>> <4>[  110.152568]  kthread+0xfb/0x130
>>> <4>[  110.152577]  ? process_one_work+0x340/0x340
>>> <4>[  110.152609]  ? kthread_park+0x80/0x80
>>> <4>[  110.152637]  ret_from_fork+0x35/0x40
>>>
>>> This is very easy to reproduce by running the nbd-runner.
>>>
>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>> ---
>>>  drivers/block/nbd.c | 36 ++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 36 insertions(+)
>>>
>>> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
>>> index 7e0501c47153..ac07e8c94c79 100644
>>> --- a/drivers/block/nbd.c
>>> +++ b/drivers/block/nbd.c
>>> @@ -26,6 +26,7 @@
>>>  #include <linux/ioctl.h>
>>>  #include <linux/mutex.h>
>>>  #include <linux/compiler.h>
>>> +#include <linux/completion.h>
>>>  #include <linux/err.h>
>>>  #include <linux/kernel.h>
>>>  #include <linux/slab.h>
>>> @@ -80,6 +81,9 @@ struct link_dead_args {
>>>  #define NBD_RT_DESTROY_ON_DISCONNECT	6
>>>  #define NBD_RT_DISCONNECT_ON_CLOSE	7
>>>  
>>> +#define NBD_DESTROY_ON_DISCONNECT	0
>>> +#define NBD_DISCONNECT_REQUESTED	1
>>> +
>>>  struct nbd_config {
>>>  	u32 flags;
>>>  	unsigned long runtime_flags;
>>> @@ -113,6 +117,9 @@ struct nbd_device {
>>>  	struct list_head list;
>>>  	struct task_struct *task_recv;
>>>  	struct task_struct *task_setup;
>>> +
>>> +	struct completion *destroy_complete;
>>> +	unsigned long flags;
>>>  };
>>>  
>>>  #define NBD_CMD_REQUEUED	1
>>> @@ -223,6 +230,16 @@ static void nbd_dev_remove(struct nbd_device *nbd)
>>>  		disk->private_data = NULL;
>>>  		put_disk(disk);
>>>  	}
>>> +
>>> +	/*
>>> +	 * Place this in the last just before the nbd is freed to
>>> +	 * make sure that the disk and the related kobject are also
>>> +	 * totally removed to avoid duplicate creation of the same
>>> +	 * one.
>>> +	 */
>>> +	if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
>>> +		complete(nbd->destroy_complete);
>>> +
>>>  	kfree(nbd);
>>>  }
>>>  
>>> @@ -1125,6 +1142,7 @@ static int nbd_disconnect(struct nbd_device *nbd)
>>>  
>>>  	dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
>>>  	set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
>>> +	set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
>>>  	send_disconnects(nbd);
>>>  	return 0;
>>>  }
>>> @@ -1636,6 +1654,7 @@ static int nbd_dev_add(int index)
>>>  	nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
>>>  		BLK_MQ_F_BLOCKING;
>>>  	nbd->tag_set.driver_data = nbd;
>>> +	nbd->destroy_complete = NULL;
>>>  
>>>  	err = blk_mq_alloc_tag_set(&nbd->tag_set);
>>>  	if (err)
>>> @@ -1750,6 +1769,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
>>>  
>>>  static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
>>>  {
>>> +	DECLARE_COMPLETION_ONSTACK(destroy_complete);
>>>  	struct nbd_device *nbd = NULL;
>>>  	struct nbd_config *config;
>>>  	int index = -1;
>>> @@ -1801,6 +1821,17 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
>>>  		mutex_unlock(&nbd_index_mutex);
>>>  		return -EINVAL;
>>>  	}
>>> +
>>> +	if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
>>> +	    test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
>>
>> You still need the nbd_put mutex part of the v3 patch don't you?
>>
>> nbd_dev_remove could call kfree(nbd) while we are accessing the nbd
>> device struct above.
>>
> 
> We're still holding the mutex here, so this is safe right?

Ah you are right for the memory issue. I think we will hit duplicate
sysfs entries errors though:

1. nbd_put takes the mutex and drops nbd->ref to 0. It then does
idr_remove and drops the mutex.

2. nbd_genl_connect takes the mutex. idr_find/idr_for_each fails to find
an existing device, so it does nbd_dev_add.

3. nbd_put now calls nbd_dev_remove, but nbd_dev_add is able to do
add_disk before nbd_dev_remove is able to do del_gendisk.

We don't use idr_alloc_cyclic so nbd_dev_add could probably get the id
we just freed, and we would get the duplicate sysfs entry error.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 2/2] nbd: fix possible page fault for nbd disk
  2019-09-17 18:40     ` Josef Bacik
  2019-09-17 19:36       ` Mike Christie
@ 2019-09-17 19:44       ` Mike Christie
  2019-09-17 19:56         ` Josef Bacik
  1 sibling, 1 reply; 10+ messages in thread
From: Mike Christie @ 2019-09-17 19:44 UTC (permalink / raw)
  To: Josef Bacik; +Cc: xiubli, axboe, linux-block

On 09/17/2019 01:40 PM, Josef Bacik wrote:
>>> +		nbd->destroy_complete = &destroy_complete;
>>
>> Also, without the mutex part of the v3 patch, we could race and
>> nbd_dev_remove could have passed the destroy_complete check already, so
>> below we will wait forever.
>>
> 
> Oh hmm you're right,

I think I am actually wrong about that part too now :) I had forgot
about the idr removal under the mutex when making my original comment.

If nbd_put grabs the mutex first then it will do idr_remove under the
mutex. If nbd_genl_connect then runs, idr_find/idr_for_each will fail
and we will allocate a new nbd device and NBD_DISCONNECT_REQUESTED will
not be set.

If nbd_genl_connect grabs the mutex first, then idr_find/idr_for_each
will succeed and we will set the completion. nbd_put will then grab the
mutex and call nbd_remove_dev and see the completion.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 2/2] nbd: fix possible page fault for nbd disk
  2019-09-17 19:44       ` Mike Christie
@ 2019-09-17 19:56         ` Josef Bacik
  0 siblings, 0 replies; 10+ messages in thread
From: Josef Bacik @ 2019-09-17 19:56 UTC (permalink / raw)
  To: Mike Christie; +Cc: Josef Bacik, xiubli, axboe, linux-block

On Tue, Sep 17, 2019 at 02:44:09PM -0500, Mike Christie wrote:
> On 09/17/2019 01:40 PM, Josef Bacik wrote:
> >>> +		nbd->destroy_complete = &destroy_complete;
> >>
> >> Also, without the mutex part of the v3 patch, we could race and
> >> nbd_dev_remove could have passed the destroy_complete check already, so
> >> below we will wait forever.
> >>
> > 
> > Oh hmm you're right,
> 
> I think I am actually wrong about that part too now :) I had forgot
> about the idr removal under the mutex when making my original comment.
> 
> If nbd_put grabs the mutex first then it will do idr_remove under the
> mutex. If nbd_genl_connect then runs, idr_find/idr_for_each will fail
> and we will allocate a new nbd device and NBD_DISCONNECT_REQUESTED will
> not be set.
> 
> If nbd_genl_connect grabs the mutex first, then idr_find/idr_for_each
> will succeed and we will set the completion. nbd_put will then grab the
> mutex and call nbd_remove_dev and see the completion.

Lol we're all wrong.  I had it in my head that complete() just did a set_bit()
so wait_on_completion() would not wait, but it does the x->done++/x->done--
thing, so cool we're good here.  Thanks,

Josef

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4 0/2] nbd: fix possible page fault for nbd disk
  2019-09-17 11:56 [PATCH v4 0/2] nbd: fix possible page fault for nbd disk xiubli
                   ` (2 preceding siblings ...)
  2019-09-17 18:15 ` [PATCH v4 0/2] " Josef Bacik
@ 2019-09-17 20:52 ` Jens Axboe
  3 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2019-09-17 20:52 UTC (permalink / raw)
  To: xiubli, josef; +Cc: mchristi, linux-block

On 9/17/19 5:56 AM, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> V3:
> - fix the case that when the NBD_CFLAG_DESTROY_ON_DISCONNECT bit is not set.
> - add "nbd: rename the runtime flags as NBD_RT_ prefixed"
> 
> V4:
> - Address the use after free bug from Mike's comments
> - This has been test for 3 days, works well.
> 
> 
> Xiubo Li (2):
>    nbd: rename the runtime flags as NBD_RT_ prefixed
>    nbd: fix possible page fault for nbd disk
> 
>   drivers/block/nbd.c | 108 +++++++++++++++++++++++++++++---------------
>   1 file changed, 72 insertions(+), 36 deletions(-)

Applied for 5.4, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-09-17 20:52 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-17 11:56 [PATCH v4 0/2] nbd: fix possible page fault for nbd disk xiubli
2019-09-17 11:56 ` [PATCH v4 1/2] nbd: rename the runtime flags as NBD_RT_ prefixed xiubli
2019-09-17 11:56 ` [PATCH v4 2/2] nbd: fix possible page fault for nbd disk xiubli
2019-09-17 18:31   ` Mike Christie
2019-09-17 18:40     ` Josef Bacik
2019-09-17 19:36       ` Mike Christie
2019-09-17 19:44       ` Mike Christie
2019-09-17 19:56         ` Josef Bacik
2019-09-17 18:15 ` [PATCH v4 0/2] " Josef Bacik
2019-09-17 20:52 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).