All of lore.kernel.org
 help / color / mirror / Atom feed
* New Patch series for the UBD Driver
@ 2018-11-12 17:41 anton.ivanov
  2018-11-12 17:41 ` [PATCH 1/3] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
                   ` (3 more replies)
  0 siblings, 4 replies; 25+ messages in thread
From: anton.ivanov @ 2018-11-12 17:41 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch

This is the revised patchset for the ubd driver. Known issues:

1. If an error is returned to an operation different from READ/WRITE/SYNC
the block subsystem goes south finishing with a crash. I have tried everything
I can think of and whatever I could lift out of the loop and nbd driver. I do
not think that the problem is in the ubd driver. I am lost here, Jens can you
please give me a hand in handling the case when ubd_handler() gets an error in
reply to a discard.

2. The QEMU discard/fallocate test is not run-time and is rather flaky as it
tries to operate on stdin, not on a filesystem. Copying it to UML does not bring
a lot of value. There is an added complication that fallocate with
a length of zero is an EINVAL, so you actually have to wipe some data in the
target file for real for the test for fallocate availability to be robust. So
if there is the need for a runtime test it has to read/fallocate/write back
data. As the most common ubd format is a plain ext2/3/4 fs with no boot record
the offset zero and length of sizeof(int) are as safe as it can get. 
Unfortunately, less safe for other fs types, but there is little I can do there.

3. This, I think is somehow related to 1. Very heavy IO like for example
dd if=/dev/zero of=/testfile bs=1M count=16M will block memory allocations and
things will start dying on OOM. That started with 4.20-rc1+fixes, it did not
exist in 4.19.

[PATCH 1/3] um: Switch to block-mq constants in the UML UBD driver
[PATCH 2/3] um: Clean-up command processing in UML UBD driver
[PATCH 3/3] um: Add support for DISCARD in the UBD Driver

Best Regards,

A.

_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/3] um: Switch to block-mq constants in the UML UBD driver
  2018-11-12 17:41 New Patch series for the UBD Driver anton.ivanov
@ 2018-11-12 17:41 ` anton.ivanov
  2018-11-12 17:48   ` Jens Axboe
  2018-11-12 17:42 ` [PATCH 2/3] um: Clean-up command processing in " anton.ivanov
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 25+ messages in thread
From: anton.ivanov @ 2018-11-12 17:41 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Switch to block mq-constants for both commands and error codes.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c | 54 ++++++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 28c40624bcb6..76cd21baac2b 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -43,11 +43,13 @@
 #include <os.h>
 #include "cow.h"
 
-enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
+#define UBD_SECTOR_SIZE (1 << 9)
+/* Max request size is determined by sector mask - 32K */
+#define UBD_MAX_REQUEST (8 * sizeof(long))
 
 struct io_thread_req {
 	struct request *req;
-	enum ubd_req op;
+	int op;
 	int fds[2];
 	unsigned long offsets[2];
 	unsigned long long offset;
@@ -511,16 +513,13 @@ static void ubd_handler(void)
 		}
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			struct io_thread_req *io_req = (*irq_req_buffer)[count];
-			int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK;
 
-			if (!blk_update_request(io_req->req, err, io_req->length))
-				__blk_mq_end_request(io_req->req, err);
+			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
+				__blk_mq_end_request(io_req->req, io_req->error);
 
 			kfree(io_req);
 		}
 	}
-
-	reactivate_fd(thread_fd, UBD_IRQ);
 }
 
 static irqreturn_t ubd_intr(int irq, void *dev)
@@ -830,6 +829,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 		if(err < 0) goto error;
 		ubd_dev->cow.fd = err;
 	}
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 	return 0;
  error:
 	os_close_file(ubd_dev->fd);
@@ -882,7 +882,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
 	return 0;
 }
 
-#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
+#define ROUND_BLOCK(n) ((n + (UBD_SECTOR_SIZE - 1)) & (-UBD_SECTOR_SIZE))
 
 static const struct blk_mq_ops ubd_mq_ops = {
 	.queue_rq = ubd_queue_rq,
@@ -1277,7 +1277,7 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
 	if(req->length > (sizeof(req->sector_mask) * 8) << 9)
 		panic("Operation too long");
 
-	if(req->op == UBD_READ) {
+	if (req_op(req->req) == REQ_OP_READ) {
 		for(i = 0; i < req->length >> 9; i++){
 			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
 				ubd_set_bit(i, (unsigned char *)
@@ -1307,15 +1307,12 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
 		io_req->fds[0] = dev->fd;
 	io_req->error = 0;
 
-	if (req_op(req) == REQ_OP_FLUSH) {
-		io_req->op = UBD_FLUSH;
-	} else {
+	if (req_op(req) != REQ_OP_FLUSH) {
 		io_req->fds[1] = dev->fd;
 		io_req->cow_offset = -1;
 		io_req->offset = off;
 		io_req->length = bvec->bv_len;
 		io_req->sector_mask = 0;
-		io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE;
 		io_req->offsets[0] = 0;
 		io_req->offsets[1] = dev->cow.data_offset;
 		io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
@@ -1413,22 +1410,37 @@ static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 	return -EINVAL;
 }
 
+static int map_error(int error_code)
+{
+	switch (error_code) {
+	case 0:
+		return BLK_STS_OK;
+	case EOPNOTSUPP:
+		return BLK_STS_NOTSUPP;
+	case EPERM:
+		return BLK_STS_PROTECTION;
+	case ENOSPC:
+		return BLK_STS_NOSPC;
+	}
+	return BLK_STS_IOERR;
+}
+
 static int update_bitmap(struct io_thread_req *req)
 {
 	int n;
 
 	if(req->cow_offset == -1)
-		return 0;
+		return map_error(0);
 
 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
 			  sizeof(req->bitmap_words), req->cow_offset);
 	if(n != sizeof(req->bitmap_words)){
 		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
 		       req->fds[1]);
-		return 1;
+		return map_error(-n);
 	}
 
-	return 0;
+	return map_error(0);
 }
 
 static void do_io(struct io_thread_req *req)
@@ -1438,13 +1450,13 @@ static void do_io(struct io_thread_req *req)
 	int n, nsectors, start, end, bit;
 	__u64 off;
 
-	if (req->op == UBD_FLUSH) {
+	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
 		n = os_sync_file(req->fds[0]);
 		if (n != 0) {
 			printk("do_io - sync failed err = %d "
 			       "fd = %d\n", -n, req->fds[0]);
-			req->error = 1;
+			req->error = map_error(-n);
 		}
 		return;
 	}
@@ -1464,7 +1476,7 @@ static void do_io(struct io_thread_req *req)
 		len = (end - start) * req->sectorsize;
 		buf = &req->buffer[start * req->sectorsize];
 
-		if(req->op == UBD_READ){
+		if (req_op(req->req) == REQ_OP_READ) {
 			n = 0;
 			do {
 				buf = &buf[n];
@@ -1473,7 +1485,7 @@ static void do_io(struct io_thread_req *req)
 				if (n < 0) {
 					printk("do_io - read failed, err = %d "
 					       "fd = %d\n", -n, req->fds[bit]);
-					req->error = 1;
+					req->error = map_error(-n);
 					return;
 				}
 			} while((n < len) && (n != 0));
@@ -1483,7 +1495,7 @@ static void do_io(struct io_thread_req *req)
 			if(n != len){
 				printk("do_io - write failed err = %d "
 				       "fd = %d\n", -n, req->fds[bit]);
-				req->error = 1;
+				req->error = map_error(-n);
 				return;
 			}
 		}
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 2/3] um: Clean-up command processing in UML UBD driver
  2018-11-12 17:41 New Patch series for the UBD Driver anton.ivanov
  2018-11-12 17:41 ` [PATCH 1/3] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
@ 2018-11-12 17:42 ` anton.ivanov
  2018-11-12 17:42 ` [PATCH 3/3] um: Add support for DISCARD in the UBD Driver anton.ivanov
  2018-11-12 17:50 ` New Patch series for " Jens Axboe
  3 siblings, 0 replies; 25+ messages in thread
From: anton.ivanov @ 2018-11-12 17:42 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Clean-up command processing and return BLK_STS_NOTSUP for
uknown commands.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c | 64 ++++++++++++++++++++++++++++------------------
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 76cd21baac2b..08b5ba9a5340 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1307,21 +1307,25 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
 		io_req->fds[0] = dev->fd;
 	io_req->error = 0;
 
-	if (req_op(req) != REQ_OP_FLUSH) {
-		io_req->fds[1] = dev->fd;
-		io_req->cow_offset = -1;
-		io_req->offset = off;
+	if (bvec != NULL) {
 		io_req->length = bvec->bv_len;
-		io_req->sector_mask = 0;
-		io_req->offsets[0] = 0;
-		io_req->offsets[1] = dev->cow.data_offset;
 		io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
-		io_req->sectorsize = 1 << 9;
+	} else {
+		io_req->buffer = NULL;
+		io_req->length = blk_rq_bytes(req);
+	}
 
-		if (dev->cow.file) {
-			cowify_req(io_req, dev->cow.bitmap,
-				   dev->cow.bitmap_offset, dev->cow.bitmap_len);
-		}
+	io_req->sectorsize = UBD_SECTOR_SIZE;
+	io_req->fds[1] = dev->fd;
+	io_req->cow_offset = -1;
+	io_req->offset = off;
+	io_req->sector_mask = 0;
+	io_req->offsets[0] = 0;
+	io_req->offsets[1] = dev->cow.data_offset;
+
+	if (dev->cow.file) {
+		cowify_req(io_req, dev->cow.bitmap,
+			   dev->cow.bitmap_offset, dev->cow.bitmap_len);
 	}
 
 	ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
@@ -1330,7 +1334,6 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
 			pr_err("write to io thread failed: %d\n", -ret);
 		kfree(io_req);
 	}
-
 	return ret;
 }
 
@@ -1344,20 +1347,31 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(req);
 
 	spin_lock_irq(&ubd_dev->lock);
-
-	if (req_op(req) == REQ_OP_FLUSH) {
+	switch (req_op(req)) {
+	/* operations with no lentgth/offset arguments */
+	case REQ_OP_FLUSH:
 		ret = ubd_queue_one_vec(hctx, req, 0, NULL);
-	} else {
-		struct req_iterator iter;
-		struct bio_vec bvec;
-		u64 off = (u64)blk_rq_pos(req) << 9;
-
-		rq_for_each_segment(bvec, req, iter) {
-			ret = ubd_queue_one_vec(hctx, req, off, &bvec);
-			if (ret < 0)
-				goto out;
-			off += bvec.bv_len;
+		break;
+	/* operations with bio_vec arguments */
+	case REQ_OP_READ:
+	case REQ_OP_WRITE:
+		{
+			struct req_iterator iter;
+			struct bio_vec bvec;
+			u64 off = (u64)blk_rq_pos(req) << 9;
+
+			rq_for_each_segment(bvec, req, iter) {
+				ret = ubd_queue_one_vec(hctx, req, off, &bvec);
+				if (ret < 0)
+					goto out;
+				off += bvec.bv_len;
+			}
 		}
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		spin_unlock_irq(&ubd_dev->lock);
+		return BLK_STS_NOTSUPP;
 	}
 out:
 	spin_unlock_irq(&ubd_dev->lock);
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 3/3] um: Add support for DISCARD in the UBD Driver
  2018-11-12 17:41 New Patch series for the UBD Driver anton.ivanov
  2018-11-12 17:41 ` [PATCH 1/3] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
  2018-11-12 17:42 ` [PATCH 2/3] um: Clean-up command processing in " anton.ivanov
@ 2018-11-12 17:42 ` anton.ivanov
  2018-11-12 17:50 ` New Patch series for " Jens Axboe
  3 siblings, 0 replies; 25+ messages in thread
From: anton.ivanov @ 2018-11-12 17:42 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Support DISCARD and WRITE_ZEROES in the ubd driver using
fallocate.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/Kconfig     | 22 ++++++++++++++
 arch/um/drivers/ubd_kern.c  | 71 +++++++++++++++++++++++++++++++++++++++------
 arch/um/include/shared/os.h |  2 ++
 arch/um/os-Linux/file.c     |  8 +++++
 4 files changed, 94 insertions(+), 9 deletions(-)

diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index 2b1aaf7755aa..a3407cca6bf9 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -1,5 +1,27 @@
 # SPDX-License-Identifier: GPL-2.0
 
+menu "UML Block Devices"
+
+config BLK_DEV_UBD
+	bool "UBD Block Device"
+	default y
+	help
+          User Mode Linux virtual block device driver
+
+config BLK_DEV_UBD_SYNC
+	bool "Use Synchronous mode for UBD"
+	default n
+	help
+          Perform all disk operations synchronously (extremely slow).
+
+config BLK_DEV_UBD_DISCARD
+	bool "Enable DISCARD/TRIM support in UBD"
+	default y
+	help
+          Enable discard/trim support. Requires host kernel 3.x or above and
+	  may not be supported on all host filesystems.
+endmenu
+
 menu "UML Character Devices"
 
 config STDERR_CONSOLE
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 08b5ba9a5340..791d637ad4ef 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -514,9 +514,13 @@ static void ubd_handler(void)
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 
-			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
-				__blk_mq_end_request(io_req->req, io_req->error);
-
+			if ((io_req->error) || (io_req->buffer == NULL)) {
+				blk_mq_end_request(io_req->req, io_req->error);
+			} else {
+				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
+					__blk_mq_end_request(io_req->req, io_req->error);
+				}
+			}
 			kfree(io_req);
 		}
 	}
@@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 	char **back_ptr;
 	int err, create_cow, *create_ptr;
 	int fd;
+	int data;
 
 	ubd_dev->openflags = ubd_dev->boot_openflags;
 	create_cow = 0;
@@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 		if(err < 0) goto error;
 		ubd_dev->cow.fd = err;
 	}
+#ifdef CONFIG_BLK_DEV_UBD_DISCARD
+	if (ubd_dev->cow.file != NULL)
+		fd = ubd_dev->cow.fd;
+	else
+		fd = ubd_dev->fd;
+	err = os_pread_file(fd, &data, sizeof(int), 0);
+	if (err == sizeof(int)) {
+		err = os_falloc_punch(fd, 0, sizeof(int));
+		if (err == 0) {
+			ubd_dev->queue->limits.discard_granularity = UBD_SECTOR_SIZE;
+			ubd_dev->queue->limits.discard_alignment = UBD_SECTOR_SIZE;
+			blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
+			blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
+			os_pwrite_file(fd, &data, sizeof(int), 0);
+		}
+	}
+#endif
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 	return 0;
  error:
@@ -1368,6 +1390,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 			}
 		}
 		break;
+#ifdef CONFIG_BLK_DEV_UBD_DISCARD
+	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
+		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
+		break;
+#endif
 	default:
 		WARN_ON_ONCE(1);
 		spin_unlock_irq(&ubd_dev->lock);
@@ -1429,6 +1457,7 @@ static int map_error(int error_code)
 	switch (error_code) {
 	case 0:
 		return BLK_STS_OK;
+	case ENOSYS:
 	case EOPNOTSUPP:
 		return BLK_STS_NOTSUPP;
 	case EPERM:
@@ -1459,11 +1488,13 @@ static int update_bitmap(struct io_thread_req *req)
 
 static void do_io(struct io_thread_req *req)
 {
-	char *buf;
+	char *buf = NULL;
 	unsigned long len;
 	int n, nsectors, start, end, bit;
 	__u64 off;
 
+	/* FLUSH is really a special case, we cannot "case" it with others */
+
 	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
 		n = os_sync_file(req->fds[0]);
@@ -1477,6 +1508,7 @@ static void do_io(struct io_thread_req *req)
 
 	nsectors = req->length / req->sectorsize;
 	start = 0;
+
 	do {
 		bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
 		end = start;
@@ -1488,9 +1520,11 @@ static void do_io(struct io_thread_req *req)
 		off = req->offset + req->offsets[bit] +
 			start * req->sectorsize;
 		len = (end - start) * req->sectorsize;
-		buf = &req->buffer[start * req->sectorsize];
+		if (req->buffer != NULL)
+			buf = &req->buffer[start * req->sectorsize];
 
-		if (req_op(req->req) == REQ_OP_READ) {
+		switch (req_op(req->req)) {
+		case REQ_OP_READ:
 			n = 0;
 			do {
 				buf = &buf[n];
@@ -1504,7 +1538,8 @@ static void do_io(struct io_thread_req *req)
 				}
 			} while((n < len) && (n != 0));
 			if (n < len) memset(&buf[n], 0, len - n);
-		} else {
+			break;
+		case REQ_OP_WRITE:
 			n = os_pwrite_file(req->fds[bit], buf, len, off);
 			if(n != len){
 				printk("do_io - write failed err = %d "
@@ -1512,12 +1547,30 @@ static void do_io(struct io_thread_req *req)
 				req->error = map_error(-n);
 				return;
 			}
+			break;
+#ifdef CONFIG_BLK_DEV_UBD_DISCARD
+		case REQ_OP_DISCARD:
+		case REQ_OP_WRITE_ZEROES:
+			n = os_falloc_punch(req->fds[bit], off, len);
+			if (n) {
+				printk("do_io - discard failed err = %d "
+				       "fd = %d\n", -n, req->fds[bit]);
+				if (n == -EOPNOTSUPP)
+					req->error = map_error(-n);
+				return;
+			}
+			break;
+#endif
+		default:
+			WARN_ON_ONCE(1);
+			req->error = BLK_STS_NOTSUPP;
+			return;
 		}
-
 		start = end;
 	} while(start < nsectors);
 
-	req->error = update_bitmap(req);
+	if (req_op(req->req) != REQ_OP_READ)
+		req->error = update_bitmap(req);
 }
 
 /* Changed in start_io_thread, which is serialized by being called only
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 048ae37eb5aa..9aa4b150e0a9 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -175,6 +175,8 @@ extern int os_fchange_dir(int fd);
 extern unsigned os_major(unsigned long long dev);
 extern unsigned os_minor(unsigned long long dev);
 extern unsigned long long os_makedev(unsigned major, unsigned minor);
+extern int os_falloc_punch(int fd, unsigned long long offset, int count);
+
 
 /* start_up.c */
 extern void os_early_checks(void);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index c0197097c86e..56e921ff3ff3 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -301,6 +301,14 @@ int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset)
 	return n;
 }
 
+int os_falloc_punch(int fd, unsigned long long offset, int len)
+{
+	int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
 
 int os_file_size(const char *file, unsigned long long *size_out)
 {
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/3] um: Switch to block-mq constants in the UML UBD driver
  2018-11-12 17:41 ` [PATCH 1/3] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
@ 2018-11-12 17:48   ` Jens Axboe
  2018-11-12 17:52     ` Anton Ivanov
  0 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-12 17:48 UTC (permalink / raw)
  To: anton.ivanov, linux-um; +Cc: richard, hch

On 11/12/18 10:41 AM, anton.ivanov@cambridgegreys.com wrote:
> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
> index 28c40624bcb6..76cd21baac2b 100644
> --- a/arch/um/drivers/ubd_kern.c
> +++ b/arch/um/drivers/ubd_kern.c
> @@ -43,11 +43,13 @@
>  #include <os.h>
>  #include "cow.h"
>  
> -enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
> +#define UBD_SECTOR_SIZE (1 << 9)
> +/* Max request size is determined by sector mask - 32K */
> +#define UBD_MAX_REQUEST (8 * sizeof(long))
>  
>  struct io_thread_req {
>  	struct request *req;
> -	enum ubd_req op;
> +	int op;

You're still adding an op field, even though you did take the suggestion
to use req_op(io_req->req) instead?

> @@ -511,16 +513,13 @@ static void ubd_handler(void)
>  		}
>  		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>  			struct io_thread_req *io_req = (*irq_req_buffer)[count];
> -			int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK;
>  
> -			if (!blk_update_request(io_req->req, err, io_req->length))
> -				__blk_mq_end_request(io_req->req, err);
> +			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
> +				__blk_mq_end_request(io_req->req, io_req->error);
>  
>  			kfree(io_req);
>  		}
>  	}
> -
> -	reactivate_fd(thread_fd, UBD_IRQ);
>  }

That last change looks unrelated. It may make sense, but it should not be
mixed in with this patch.

> @@ -1413,22 +1410,37 @@ static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
>  	return -EINVAL;
>  }
>  
> +static int map_error(int error_code)
> +{
> +	switch (error_code) {
> +	case 0:
> +		return BLK_STS_OK;
> +	case EOPNOTSUPP:
> +		return BLK_STS_NOTSUPP;
> +	case EPERM:
> +		return BLK_STS_PROTECTION;
> +	case ENOSPC:
> +		return BLK_STS_NOSPC;
> +	}
> +	return BLK_STS_IOERR;
> +}
> +

BLK_STS_PROTECTION is not the right one to use here, that's for data
integrity issues related to DIF/DIX.


-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-12 17:41 New Patch series for the UBD Driver anton.ivanov
                   ` (2 preceding siblings ...)
  2018-11-12 17:42 ` [PATCH 3/3] um: Add support for DISCARD in the UBD Driver anton.ivanov
@ 2018-11-12 17:50 ` Jens Axboe
  2018-11-12 18:00   ` Anton Ivanov
  3 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-12 17:50 UTC (permalink / raw)
  To: anton.ivanov, linux-um; +Cc: richard, hch

On 11/12/18 10:41 AM, anton.ivanov@cambridgegreys.com wrote:
> This is the revised patchset for the ubd driver. Known issues:
> 
> 1. If an error is returned to an operation different from READ/WRITE/SYNC
> the block subsystem goes south finishing with a crash. I have tried everything
> I can think of and whatever I could lift out of the loop and nbd driver. I do
> not think that the problem is in the ubd driver. I am lost here, Jens can you
> please give me a hand in handling the case when ubd_handler() gets an error in
> reply to a discard.

It would help if you showed a crash from doing that, and an example of
how you are doing that.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/3] um: Switch to block-mq constants in the UML UBD driver
  2018-11-12 17:48   ` Jens Axboe
@ 2018-11-12 17:52     ` Anton Ivanov
  0 siblings, 0 replies; 25+ messages in thread
From: Anton Ivanov @ 2018-11-12 17:52 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch


On 11/12/18 5:48 PM, Jens Axboe wrote:
> On 11/12/18 10:41 AM, anton.ivanov@cambridgegreys.com wrote:
>> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
>> index 28c40624bcb6..76cd21baac2b 100644
>> --- a/arch/um/drivers/ubd_kern.c
>> +++ b/arch/um/drivers/ubd_kern.c
>> @@ -43,11 +43,13 @@
>>   #include <os.h>
>>   #include "cow.h"
>>   
>> -enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
>> +#define UBD_SECTOR_SIZE (1 << 9)
>> +/* Max request size is determined by sector mask - 32K */
>> +#define UBD_MAX_REQUEST (8 * sizeof(long))
>>   
>>   struct io_thread_req {
>>   	struct request *req;
>> -	enum ubd_req op;
>> +	int op;
> You're still adding an op field, even though you did take the suggestion
> to use req_op(io_req->req) instead?

Oops... sorry... missed it.

>> @@ -511,16 +513,13 @@ static void ubd_handler(void)
>>   		}
>>   		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>>   			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>> -			int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK;
>>   
>> -			if (!blk_update_request(io_req->req, err, io_req->length))
>> -				__blk_mq_end_request(io_req->req, err);
>> +			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>> +				__blk_mq_end_request(io_req->req, io_req->error);
>>   
>>   			kfree(io_req);
>>   		}
>>   	}
>> -
>> -	reactivate_fd(thread_fd, UBD_IRQ);
>>   }
> That last change looks unrelated. It may make sense, but it should not be
> mixed in with this patch.

Ack - reactivate_fd() is a NOOP now after UML got a new IRQ controller.

Killing it can go into a separate patch.

>
>> @@ -1413,22 +1410,37 @@ static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
>>   	return -EINVAL;
>>   }
>>   
>> +static int map_error(int error_code)
>> +{
>> +	switch (error_code) {
>> +	case 0:
>> +		return BLK_STS_OK;
>> +	case EOPNOTSUPP:
>> +		return BLK_STS_NOTSUPP;
>> +	case EPERM:
>> +		return BLK_STS_PROTECTION;
>> +	case ENOSPC:
>> +		return BLK_STS_NOSPC;
>> +	}
>> +	return BLK_STS_IOERR;
>> +}
>> +
> BLK_STS_PROTECTION is not the right one to use here, that's for data
> integrity issues related to DIF/DIX.

Understood.

>
>
-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-12 17:50 ` New Patch series for " Jens Axboe
@ 2018-11-12 18:00   ` Anton Ivanov
  2018-11-12 18:04     ` Jens Axboe
  0 siblings, 1 reply; 25+ messages in thread
From: Anton Ivanov @ 2018-11-12 18:00 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch


On 11/12/18 5:50 PM, Jens Axboe wrote:
> On 11/12/18 10:41 AM, anton.ivanov@cambridgegreys.com wrote:
>> This is the revised patchset for the ubd driver. Known issues:
>>
>> 1. If an error is returned to an operation different from READ/WRITE/SYNC
>> the block subsystem goes south finishing with a crash. I have tried everything
>> I can think of and whatever I could lift out of the loop and nbd driver. I do
>> not think that the problem is in the ubd driver. I am lost here, Jens can you
>> please give me a hand in handling the case when ubd_handler() gets an error in
>> reply to a discard.
> It would help if you showed a crash from doing that, and an example of
> how you are doing that.

Test case - 1G ext4 image on MS DOS filesystem.

With the present patch, the logic around line 844 should be commented 
out so that DISCARD is always enabled. I can send a patch where this is 
always enabled for testing purposes.

Mount filesystem - mount /dev/ubdb /mnt

Try fstrim /mnt

Crash is immediate,  but different in each case.

Some cases contain the WARN from line 439 in blk.h . Some contain 
spinlock recursion BUG(). Some do not contain anything - it just 
crashes. Or hangs.

I tried various combinations (and code theft from loop and nbd) of 
blk_update_request, __blk_mq_end_request, blk_mq_end_request, etc - the 
flavor of the crash changes a bit, but not the essence.

Firsts question - is any of that expected to be invoked out of an 
interrupt handler in the first place?

A.
>
-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-12 18:00   ` Anton Ivanov
@ 2018-11-12 18:04     ` Jens Axboe
  2018-11-12 18:12       ` Anton Ivanov
  0 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-12 18:04 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard, hch

On 11/12/18 11:00 AM, Anton Ivanov wrote:
> 
> On 11/12/18 5:50 PM, Jens Axboe wrote:
>> On 11/12/18 10:41 AM, anton.ivanov@cambridgegreys.com wrote:
>>> This is the revised patchset for the ubd driver. Known issues:
>>>
>>> 1. If an error is returned to an operation different from READ/WRITE/SYNC
>>> the block subsystem goes south finishing with a crash. I have tried everything
>>> I can think of and whatever I could lift out of the loop and nbd driver. I do
>>> not think that the problem is in the ubd driver. I am lost here, Jens can you
>>> please give me a hand in handling the case when ubd_handler() gets an error in
>>> reply to a discard.
>> It would help if you showed a crash from doing that, and an example of
>> how you are doing that.
> 
> Test case - 1G ext4 image on MS DOS filesystem.
> 
> With the present patch, the logic around line 844 should be commented 
> out so that DISCARD is always enabled. I can send a patch where this is 
> always enabled for testing purposes.
> 
> Mount filesystem - mount /dev/ubdb /mnt
> 
> Try fstrim /mnt
> 
> Crash is immediate,  but different in each case.
> 
> Some cases contain the WARN from line 439 in blk.h . Some contain 
> spinlock recursion BUG(). Some do not contain anything - it just 
> crashes. Or hangs.
> 
> I tried various combinations (and code theft from loop and nbd) of 
> blk_update_request, __blk_mq_end_request, blk_mq_end_request, etc - the 
> flavor of the crash changes a bit, but not the essence.

Sorry, all of this isn't very clear to me. It definitely sounds like
you are doing something wrong. Where are you calling falloc_punch()
from? It's not even safe from ->queue_rq(), you'd need to add
BLK_MQ_F_BLOCKING to your mq_ops flags to ensure that blk-mq
always calls you from a context that can block. If you're doing
it from the irq handler, well then...

> Firsts question - is any of that expected to be invoked out of an 
> interrupt handler in the first place?

What is "that" here?

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-12 18:04     ` Jens Axboe
@ 2018-11-12 18:12       ` Anton Ivanov
  2018-11-12 18:14         ` Jens Axboe
  0 siblings, 1 reply; 25+ messages in thread
From: Anton Ivanov @ 2018-11-12 18:12 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch


On 11/12/18 6:04 PM, Jens Axboe wrote:
> On 11/12/18 11:00 AM, Anton Ivanov wrote:
>> On 11/12/18 5:50 PM, Jens Axboe wrote:
>>> On 11/12/18 10:41 AM, anton.ivanov@cambridgegreys.com wrote:
>>>> This is the revised patchset for the ubd driver. Known issues:
>>>>
>>>> 1. If an error is returned to an operation different from READ/WRITE/SYNC
>>>> the block subsystem goes south finishing with a crash. I have tried everything
>>>> I can think of and whatever I could lift out of the loop and nbd driver. I do
>>>> not think that the problem is in the ubd driver. I am lost here, Jens can you
>>>> please give me a hand in handling the case when ubd_handler() gets an error in
>>>> reply to a discard.
>>> It would help if you showed a crash from doing that, and an example of
>>> how you are doing that.
>> Test case - 1G ext4 image on MS DOS filesystem.
>>
>> With the present patch, the logic around line 844 should be commented
>> out so that DISCARD is always enabled. I can send a patch where this is
>> always enabled for testing purposes.
>>
>> Mount filesystem - mount /dev/ubdb /mnt
>>
>> Try fstrim /mnt
>>
>> Crash is immediate,  but different in each case.
>>
>> Some cases contain the WARN from line 439 in blk.h . Some contain
>> spinlock recursion BUG(). Some do not contain anything - it just
>> crashes. Or hangs.
>>
>> I tried various combinations (and code theft from loop and nbd) of
>> blk_update_request, __blk_mq_end_request, blk_mq_end_request, etc - the
>> flavor of the crash changes a bit, but not the essence.
> Sorry, all of this isn't very clear to me. It definitely sounds like
> you are doing something wrong. Where are you calling falloc_punch()
> from?

 From the IO thread. It sets the error code and writes to the IPC pipe 
which triggers an interrupt. Same as with any other disk IO in UML/UBD.

> It's not even safe from ->queue_rq(), you'd need to add
> BLK_MQ_F_BLOCKING to your mq_ops flags to ensure that blk-mq
> always calls you from a context that can block. If you're doing
> it from the irq handler, well then...

The only thing the IRQ handler does is to process the completion request.

That works OK for requests which have no error.

It also works fine (no crashes) for requests which return an error to 
the FSYNC call. I did not do proper exhaustive testing with error 
injection, but based on cursory testing read/write error handling also 
seems to work (do not take my word for it, I will write a proper "error 
injection" harness tomorrow to retest that).

The issue shows immediately when processing the completion of a DISCARD 
request which has an error set. DISCARD requests with no error set are fine.

A.

>
>> Firsts question - is any of that expected to be invoked out of an
>> interrupt handler in the first place?
> What is "that" here?
>
-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-12 18:12       ` Anton Ivanov
@ 2018-11-12 18:14         ` Jens Axboe
  2018-11-12 18:23           ` Anton Ivanov
  0 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-12 18:14 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard, hch

On 11/12/18 11:12 AM, Anton Ivanov wrote:
> 
> On 11/12/18 6:04 PM, Jens Axboe wrote:
>> On 11/12/18 11:00 AM, Anton Ivanov wrote:
>>> On 11/12/18 5:50 PM, Jens Axboe wrote:
>>>> On 11/12/18 10:41 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>> This is the revised patchset for the ubd driver. Known issues:
>>>>>
>>>>> 1. If an error is returned to an operation different from READ/WRITE/SYNC
>>>>> the block subsystem goes south finishing with a crash. I have tried everything
>>>>> I can think of and whatever I could lift out of the loop and nbd driver. I do
>>>>> not think that the problem is in the ubd driver. I am lost here, Jens can you
>>>>> please give me a hand in handling the case when ubd_handler() gets an error in
>>>>> reply to a discard.
>>>> It would help if you showed a crash from doing that, and an example of
>>>> how you are doing that.
>>> Test case - 1G ext4 image on MS DOS filesystem.
>>>
>>> With the present patch, the logic around line 844 should be commented
>>> out so that DISCARD is always enabled. I can send a patch where this is
>>> always enabled for testing purposes.
>>>
>>> Mount filesystem - mount /dev/ubdb /mnt
>>>
>>> Try fstrim /mnt
>>>
>>> Crash is immediate,  but different in each case.
>>>
>>> Some cases contain the WARN from line 439 in blk.h . Some contain
>>> spinlock recursion BUG(). Some do not contain anything - it just
>>> crashes. Or hangs.
>>>
>>> I tried various combinations (and code theft from loop and nbd) of
>>> blk_update_request, __blk_mq_end_request, blk_mq_end_request, etc - the
>>> flavor of the crash changes a bit, but not the essence.
>> Sorry, all of this isn't very clear to me. It definitely sounds like
>> you are doing something wrong. Where are you calling falloc_punch()
>> from?
> 
>  From the IO thread. It sets the error code and writes to the IPC pipe 
> which triggers an interrupt. Same as with any other disk IO in UML/UBD.
> 
>> It's not even safe from ->queue_rq(), you'd need to add
>> BLK_MQ_F_BLOCKING to your mq_ops flags to ensure that blk-mq
>> always calls you from a context that can block. If you're doing
>> it from the irq handler, well then...
> 
> The only thing the IRQ handler does is to process the completion request.
> 
> That works OK for requests which have no error.

Note the warning above on blocking from ->queue()rq...

> It also works fine (no crashes) for requests which return an error to 
> the FSYNC call. I did not do proper exhaustive testing with error 
> injection, but based on cursory testing read/write error handling also 
> seems to work (do not take my word for it, I will write a proper "error 
> injection" harness tomorrow to retest that).
> 
> The issue shows immediately when processing the completion of a DISCARD 
> request which has an error set. DISCARD requests with no error set are fine.

Please share some of the crashes, I can't help you with this without
having something concrete.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-12 18:14         ` Jens Axboe
@ 2018-11-12 18:23           ` Anton Ivanov
  2018-11-12 18:24             ` Anton Ivanov
  2018-11-12 18:28             ` Jens Axboe
  0 siblings, 2 replies; 25+ messages in thread
From: Anton Ivanov @ 2018-11-12 18:23 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch


On 11/12/18 6:14 PM, Jens Axboe wrote:
> On 11/12/18 11:12 AM, Anton Ivanov wrote:
>> On 11/12/18 6:04 PM, Jens Axboe wrote:
>>> On 11/12/18 11:00 AM, Anton Ivanov wrote:
>>>> On 11/12/18 5:50 PM, Jens Axboe wrote:
>>>>> On 11/12/18 10:41 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>> This is the revised patchset for the ubd driver. Known issues:
>>>>>>
>>>>>> 1. If an error is returned to an operation different from READ/WRITE/SYNC
>>>>>> the block subsystem goes south finishing with a crash. I have tried everything
>>>>>> I can think of and whatever I could lift out of the loop and nbd driver. I do
>>>>>> not think that the problem is in the ubd driver. I am lost here, Jens can you
>>>>>> please give me a hand in handling the case when ubd_handler() gets an error in
>>>>>> reply to a discard.
>>>>> It would help if you showed a crash from doing that, and an example of
>>>>> how you are doing that.
>>>> Test case - 1G ext4 image on MS DOS filesystem.
>>>>
>>>> With the present patch, the logic around line 844 should be commented
>>>> out so that DISCARD is always enabled. I can send a patch where this is
>>>> always enabled for testing purposes.
>>>>
>>>> Mount filesystem - mount /dev/ubdb /mnt
>>>>
>>>> Try fstrim /mnt
>>>>
>>>> Crash is immediate,  but different in each case.
>>>>
>>>> Some cases contain the WARN from line 439 in blk.h . Some contain
>>>> spinlock recursion BUG(). Some do not contain anything - it just
>>>> crashes. Or hangs.
>>>>
>>>> I tried various combinations (and code theft from loop and nbd) of
>>>> blk_update_request, __blk_mq_end_request, blk_mq_end_request, etc - the
>>>> flavor of the crash changes a bit, but not the essence.
>>> Sorry, all of this isn't very clear to me. It definitely sounds like
>>> you are doing something wrong. Where are you calling falloc_punch()
>>> from?
>>   From the IO thread. It sets the error code and writes to the IPC pipe
>> which triggers an interrupt. Same as with any other disk IO in UML/UBD.
>>
>>> It's not even safe from ->queue_rq(), you'd need to add
>>> BLK_MQ_F_BLOCKING to your mq_ops flags to ensure that blk-mq
>>> always calls you from a context that can block. If you're doing
>>> it from the irq handler, well then...
>> The only thing the IRQ handler does is to process the completion request.
>>
>> That works OK for requests which have no error.
> Note the warning above on blocking from ->queue()rq...

This is already of help. The closest to UML UBD is NBD. The difference 
is that there you talk to a remote server, while in UML you talk to an 
IO thread which does all the IO.

NBD already has MQ_F_BLOCKING enabled, I will have a look at it in more 
detail.

>
>> It also works fine (no crashes) for requests which return an error to
>> the FSYNC call. I did not do proper exhaustive testing with error
>> injection, but based on cursory testing read/write error handling also
>> seems to work (do not take my word for it, I will write a proper "error
>> injection" harness tomorrow to retest that).
>>
>> The issue shows immediately when processing the completion of a DISCARD
>> request which has an error set. DISCARD requests with no error set are fine.
> Please share some of the crashes, I can't help you with this without
> having something concrete.

root@stretch-opx:~# fstrim /mnt
[  135.160000] do_io - discard failed err = 95 fd = 23
[  135.160000] do_io - discard failed err = 95 fd = 23
[  135.160000]  [<600850f0>] ? prepare_to_wait+0x0/0x80
[  135.170000] ------------[ cut here ]------------
[  135.170000] WARNING: CPU: 0 PID: 1151 at block/blk.h:439 
generic_make_request_checks+0x3b5/0x680
[  135.170000] Modules linked in:
[  135.170000] CPU: 0 PID: 1151 Comm: fstrim Tainted: G W         
4.20.0-rc1-00005-g046844eea47e-dirty #121
[  135.170000] Stack:
[  135.170000]  d3821740 60744258 d3821788 00000000
[  135.170000]  00000009 00000000 d3821750 607442ac
[  135.170000]  d38217c0 6005034e d3821800 600e5902
[  135.170000] Call Trace:
[  135.170000]  [<603ffec5>] ? generic_make_request_checks+0x3b5/0x680
[  135.170000]  [<6009271a>] ? printk+0x0/0x94
[  135.170000]  [<6002a849>] show_stack+0x129/0x190
[  135.170000]  [<60744258>] ? dump_stack_print_info+0xe8/0x100
[  135.170000]  [<607442ac>] dump_stack+0x2a/0x2e
[  135.170000]  [<6005034e>] __warn+0x13e/0x170
[  135.170000]  [<600e5902>] ? mempool_alloc+0x92/0x1e0
[  135.170000]  [<600504f6>] warn_slowpath_null+0x46/0x4b
[  135.170000]  [<603ffec5>] generic_make_request_checks+0x3b5/0x680
[  135.170000]  [<603f9783>] ? bio_alloc_bioset+0x193/0x320
[  135.170000]  [<60044ef0>] ? set_signals+0x0/0x50
[  135.170000]  [<60044ef0>] ? set_signals+0x0/0x50
[  135.170000]  [<6040118f>] generic_make_request+0xdf/0x480
[  135.170000]  [<603f92d6>] ? __bio_clone_fast+0xd6/0xe0
[  135.170000]  [<603f9944>] ? bio_clone_fast+0x34/0x40
[  135.170000]  [<6040961e>] blk_queue_split+0x59e/0x620
[  135.170000]  [<60044ef0>] ? set_signals+0x0/0x50
[  135.170000]  [<6040f84a>] blk_mq_make_request+0x7a/0x400
[  135.170000]  [<600e5902>] ? mempool_alloc+0x92/0x1e0
[  135.170000]  [<603fae90>] ? bio_endio+0x0/0x130
[  135.170000]  [<604014cd>] generic_make_request+0x41d/0x480
[  135.170000]  [<6040170f>] submit_bio+0x1df/0x1f0
[  135.170000]  [<603f8348>] submit_bio_wait+0x68/0x90
[  135.170000]  [<6040bd22>] blkdev_issue_discard+0x72/0xb0
[  135.170000]  [<602174f0>] ext4_trim_all_free+0x350/0x680
[  135.170000]  [<607ccc90>] ? _raw_spin_unlock+0x0/0x20
[  135.170000]  [<6002ce80>] ? copy_chunk_from_user+0x0/0x30
[  135.170000]  [<6021b5af>] ext4_trim_fs+0x1cf/0x270
[  135.170000]  [<602109cf>] ext4_ioctl+0x1cbf/0x1f40
[  135.170000]  [<60152e08>] ? vfs_getattr_nosec+0x68/0x90
[  135.170000]  [<60153163>] ? cp_new_stat+0x163/0x180
[  135.170000]  [<60165b18>] do_vfs_ioctl+0x798/0xa40
[  135.170000]  [<60165e23>] ksys_ioctl+0x63/0xa0
[  135.170000]  [<6078def0>] ? ptrace+0x0/0xa0
[  135.170000]  [<60165e70>] sys_ioctl+0x10/0x20
[  135.170000]  [<6002ce58>] handle_syscall+0x98/0xc0
[  135.170000]  [<6075be10>] ? __waitpid+0x0/0xa0
[  135.170000]  [<600482d6>] userspace+0x4c6/0x5b0
[  135.170000]  [<600438af>] ? save_registers+0x1f/0x50
[  135.170000]  [<6004b747>] ? arch_prctl+0x117/0x1c0
[  135.170000]  [<6005c587>] ? calculate_sigpending+0x77/0x80
[  135.170000]  [<60029204>] fork_handler+0x94/0xa0
[  135.170000]
[  135.170000] ---[ end trace 24ca562b91e6dbdf ]---

And hang. Let me try a few more times to see if I can get the spinlock 
recursion and/or abort instead of hang case (they are more difficult to 
reproduce).

>
-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-12 18:23           ` Anton Ivanov
@ 2018-11-12 18:24             ` Anton Ivanov
  2018-11-12 18:28             ` Jens Axboe
  1 sibling, 0 replies; 25+ messages in thread
From: Anton Ivanov @ 2018-11-12 18:24 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch


On 11/12/18 6:23 PM, Anton Ivanov wrote:
>
> On 11/12/18 6:14 PM, Jens Axboe wrote:
>> On 11/12/18 11:12 AM, Anton Ivanov wrote:
>>> On 11/12/18 6:04 PM, Jens Axboe wrote:
>>>> On 11/12/18 11:00 AM, Anton Ivanov wrote:
>>>>> On 11/12/18 5:50 PM, Jens Axboe wrote:
>>>>>> On 11/12/18 10:41 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>>> This is the revised patchset for the ubd driver. Known issues:
>>>>>>>
>>>>>>> 1. If an error is returned to an operation different from 
>>>>>>> READ/WRITE/SYNC
>>>>>>> the block subsystem goes south finishing with a crash. I have 
>>>>>>> tried everything
>>>>>>> I can think of and whatever I could lift out of the loop and nbd 
>>>>>>> driver. I do
>>>>>>> not think that the problem is in the ubd driver. I am lost here, 
>>>>>>> Jens can you
>>>>>>> please give me a hand in handling the case when ubd_handler() 
>>>>>>> gets an error in
>>>>>>> reply to a discard.
>>>>>> It would help if you showed a crash from doing that, and an 
>>>>>> example of
>>>>>> how you are doing that.
>>>>> Test case - 1G ext4 image on MS DOS filesystem.
>>>>>
>>>>> With the present patch, the logic around line 844 should be commented
>>>>> out so that DISCARD is always enabled. I can send a patch where 
>>>>> this is
>>>>> always enabled for testing purposes.
>>>>>
>>>>> Mount filesystem - mount /dev/ubdb /mnt
>>>>>
>>>>> Try fstrim /mnt
>>>>>
>>>>> Crash is immediate,  but different in each case.
>>>>>
>>>>> Some cases contain the WARN from line 439 in blk.h . Some contain
>>>>> spinlock recursion BUG(). Some do not contain anything - it just
>>>>> crashes. Or hangs.
>>>>>
>>>>> I tried various combinations (and code theft from loop and nbd) of
>>>>> blk_update_request, __blk_mq_end_request, blk_mq_end_request, etc 
>>>>> - the
>>>>> flavor of the crash changes a bit, but not the essence.
>>>> Sorry, all of this isn't very clear to me. It definitely sounds like
>>>> you are doing something wrong. Where are you calling falloc_punch()
>>>> from?
>>>   From the IO thread. It sets the error code and writes to the IPC pipe
>>> which triggers an interrupt. Same as with any other disk IO in UML/UBD.
>>>
>>>> It's not even safe from ->queue_rq(), you'd need to add
>>>> BLK_MQ_F_BLOCKING to your mq_ops flags to ensure that blk-mq
>>>> always calls you from a context that can block. If you're doing
>>>> it from the irq handler, well then...
>>> The only thing the IRQ handler does is to process the completion 
>>> request.
>>>
>>> That works OK for requests which have no error.
>> Note the warning above on blocking from ->queue()rq...
>
> This is already of help. The closest to UML UBD is NBD. The difference 
> is that there you talk to a remote server, while in UML you talk to an 
> IO thread which does all the IO.
>
> NBD already has MQ_F_BLOCKING enabled, I will have a look at it in 
> more detail.
>
>>
>>> It also works fine (no crashes) for requests which return an error to
>>> the FSYNC call. I did not do proper exhaustive testing with error
>>> injection, but based on cursory testing read/write error handling also
>>> seems to work (do not take my word for it, I will write a proper "error
>>> injection" harness tomorrow to retest that).
>>>
>>> The issue shows immediately when processing the completion of a DISCARD
>>> request which has an error set. DISCARD requests with no error set 
>>> are fine.
>> Please share some of the crashes, I can't help you with this without
>> having something concrete.
>
> root@stretch-opx:~# fstrim /mnt
> [  135.160000] do_io - discard failed err = 95 fd = 23
> [  135.160000] do_io - discard failed err = 95 fd = 23
> [  135.160000]  [<600850f0>] ? prepare_to_wait+0x0/0x80
> [  135.170000] ------------[ cut here ]------------
> [  135.170000] WARNING: CPU: 0 PID: 1151 at block/blk.h:439 
> generic_make_request_checks+0x3b5/0x680
> [  135.170000] Modules linked in:
> [  135.170000] CPU: 0 PID: 1151 Comm: fstrim Tainted: G W 
> 4.20.0-rc1-00005-g046844eea47e-dirty #121
> [  135.170000] Stack:
> [  135.170000]  d3821740 60744258 d3821788 00000000
> [  135.170000]  00000009 00000000 d3821750 607442ac
> [  135.170000]  d38217c0 6005034e d3821800 600e5902
> [  135.170000] Call Trace:
> [  135.170000]  [<603ffec5>] ? generic_make_request_checks+0x3b5/0x680
> [  135.170000]  [<6009271a>] ? printk+0x0/0x94
> [  135.170000]  [<6002a849>] show_stack+0x129/0x190
> [  135.170000]  [<60744258>] ? dump_stack_print_info+0xe8/0x100
> [  135.170000]  [<607442ac>] dump_stack+0x2a/0x2e
> [  135.170000]  [<6005034e>] __warn+0x13e/0x170
> [  135.170000]  [<600e5902>] ? mempool_alloc+0x92/0x1e0
> [  135.170000]  [<600504f6>] warn_slowpath_null+0x46/0x4b
> [  135.170000]  [<603ffec5>] generic_make_request_checks+0x3b5/0x680
> [  135.170000]  [<603f9783>] ? bio_alloc_bioset+0x193/0x320
> [  135.170000]  [<60044ef0>] ? set_signals+0x0/0x50
> [  135.170000]  [<60044ef0>] ? set_signals+0x0/0x50
> [  135.170000]  [<6040118f>] generic_make_request+0xdf/0x480
> [  135.170000]  [<603f92d6>] ? __bio_clone_fast+0xd6/0xe0
> [  135.170000]  [<603f9944>] ? bio_clone_fast+0x34/0x40
> [  135.170000]  [<6040961e>] blk_queue_split+0x59e/0x620
> [  135.170000]  [<60044ef0>] ? set_signals+0x0/0x50
> [  135.170000]  [<6040f84a>] blk_mq_make_request+0x7a/0x400
> [  135.170000]  [<600e5902>] ? mempool_alloc+0x92/0x1e0
> [  135.170000]  [<603fae90>] ? bio_endio+0x0/0x130
> [  135.170000]  [<604014cd>] generic_make_request+0x41d/0x480
> [  135.170000]  [<6040170f>] submit_bio+0x1df/0x1f0
> [  135.170000]  [<603f8348>] submit_bio_wait+0x68/0x90
> [  135.170000]  [<6040bd22>] blkdev_issue_discard+0x72/0xb0
> [  135.170000]  [<602174f0>] ext4_trim_all_free+0x350/0x680
> [  135.170000]  [<607ccc90>] ? _raw_spin_unlock+0x0/0x20
> [  135.170000]  [<6002ce80>] ? copy_chunk_from_user+0x0/0x30
> [  135.170000]  [<6021b5af>] ext4_trim_fs+0x1cf/0x270
> [  135.170000]  [<602109cf>] ext4_ioctl+0x1cbf/0x1f40
> [  135.170000]  [<60152e08>] ? vfs_getattr_nosec+0x68/0x90
> [  135.170000]  [<60153163>] ? cp_new_stat+0x163/0x180
> [  135.170000]  [<60165b18>] do_vfs_ioctl+0x798/0xa40
> [  135.170000]  [<60165e23>] ksys_ioctl+0x63/0xa0
> [  135.170000]  [<6078def0>] ? ptrace+0x0/0xa0
> [  135.170000]  [<60165e70>] sys_ioctl+0x10/0x20
> [  135.170000]  [<6002ce58>] handle_syscall+0x98/0xc0
> [  135.170000]  [<6075be10>] ? __waitpid+0x0/0xa0
> [  135.170000]  [<600482d6>] userspace+0x4c6/0x5b0
> [  135.170000]  [<600438af>] ? save_registers+0x1f/0x50
> [  135.170000]  [<6004b747>] ? arch_prctl+0x117/0x1c0
> [  135.170000]  [<6005c587>] ? calculate_sigpending+0x77/0x80
> [  135.170000]  [<60029204>] fork_handler+0x94/0xa0
> [  135.170000]
> [  135.170000] ---[ end trace 24ca562b91e6dbdf ]---
>
> And hang. Let me try a few more times to see if I can get the spinlock 
> recursion and/or abort instead of hang case (they are more difficult 
> to reproduce).

And the spinlock recursion (not particularly informative):

root@stretch-opx:~# mount /dev/ubdb /mnt
[   34.480000] EXT4-fs (ubdb): recovery complete
[   34.480000] EXT4-fs (ubdb): mounted filesystem with ordered data 
mode. Opts: (null)
fstrim /mnt/
[   42.410000] do_io - discard failed err = 95 fd = 23
[   42.410000] do_io - discard failed err = 95 fd = 23
[   42.410000] do_io - discard failed err = 95 fd = 23
[   42.420000] BUG: spinlock cpu recursion on CPU#0, fstrim/537
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] do_io - discard failed err = 95 fd = 23
[   42.420000] print_req_error: operation not supported error, dev ubdb, 
sector 33976
[   42.420000] print_req_error: operation not supported error, dev ubdb, 
sector 34040
[   42.420000] print_req_error: operation not supported error, dev ubdb, 
sector 34104
[   42.420000] print_req_error: operation not supported error, dev ubdb, 
sector 34168
[   42.420000] print_req_error: operation not supported error, dev ubdb, 
sector 34232
[   42.420000] print_req_error: operation not supported error, dev ubdb, 
sector 34296
[   42.420000] print_req_error: operation not supported error, dev ubdb, 
sector 34360
[   42.420000] print_req_error: operation not supported error, dev ubdb, 
sector 34424
[   42.420000] print_req_error: operation not supported error, dev ubdb, 
sector 34488
[   42.420000] print_req_error: operation not supported error, dev ubdb, 
sector 34552

>
>>
-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-12 18:23           ` Anton Ivanov
  2018-11-12 18:24             ` Anton Ivanov
@ 2018-11-12 18:28             ` Jens Axboe
  2018-11-12 18:41               ` Anton Ivanov
  1 sibling, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-12 18:28 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard, hch

On 11/12/18 11:23 AM, Anton Ivanov wrote:
> 
> On 11/12/18 6:14 PM, Jens Axboe wrote:
>> On 11/12/18 11:12 AM, Anton Ivanov wrote:
>>> On 11/12/18 6:04 PM, Jens Axboe wrote:
>>>> On 11/12/18 11:00 AM, Anton Ivanov wrote:
>>>>> On 11/12/18 5:50 PM, Jens Axboe wrote:
>>>>>> On 11/12/18 10:41 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>>> This is the revised patchset for the ubd driver. Known issues:
>>>>>>>
>>>>>>> 1. If an error is returned to an operation different from READ/WRITE/SYNC
>>>>>>> the block subsystem goes south finishing with a crash. I have tried everything
>>>>>>> I can think of and whatever I could lift out of the loop and nbd driver. I do
>>>>>>> not think that the problem is in the ubd driver. I am lost here, Jens can you
>>>>>>> please give me a hand in handling the case when ubd_handler() gets an error in
>>>>>>> reply to a discard.
>>>>>> It would help if you showed a crash from doing that, and an example of
>>>>>> how you are doing that.
>>>>> Test case - 1G ext4 image on MS DOS filesystem.
>>>>>
>>>>> With the present patch, the logic around line 844 should be commented
>>>>> out so that DISCARD is always enabled. I can send a patch where this is
>>>>> always enabled for testing purposes.
>>>>>
>>>>> Mount filesystem - mount /dev/ubdb /mnt
>>>>>
>>>>> Try fstrim /mnt
>>>>>
>>>>> Crash is immediate,  but different in each case.
>>>>>
>>>>> Some cases contain the WARN from line 439 in blk.h . Some contain
>>>>> spinlock recursion BUG(). Some do not contain anything - it just
>>>>> crashes. Or hangs.
>>>>>
>>>>> I tried various combinations (and code theft from loop and nbd) of
>>>>> blk_update_request, __blk_mq_end_request, blk_mq_end_request, etc - the
>>>>> flavor of the crash changes a bit, but not the essence.
>>>> Sorry, all of this isn't very clear to me. It definitely sounds like
>>>> you are doing something wrong. Where are you calling falloc_punch()
>>>> from?
>>>   From the IO thread. It sets the error code and writes to the IPC pipe
>>> which triggers an interrupt. Same as with any other disk IO in UML/UBD.
>>>
>>>> It's not even safe from ->queue_rq(), you'd need to add
>>>> BLK_MQ_F_BLOCKING to your mq_ops flags to ensure that blk-mq
>>>> always calls you from a context that can block. If you're doing
>>>> it from the irq handler, well then...
>>> The only thing the IRQ handler does is to process the completion request.
>>>
>>> That works OK for requests which have no error.
>> Note the warning above on blocking from ->queue()rq...
> 
> This is already of help. The closest to UML UBD is NBD. The difference 
> is that there you talk to a remote server, while in UML you talk to an 
> IO thread which does all the IO.
> 
> NBD already has MQ_F_BLOCKING enabled, I will have a look at it in more 
> detail.
> 
>>
>>> It also works fine (no crashes) for requests which return an error to
>>> the FSYNC call. I did not do proper exhaustive testing with error
>>> injection, but based on cursory testing read/write error handling also
>>> seems to work (do not take my word for it, I will write a proper "error
>>> injection" harness tomorrow to retest that).
>>>
>>> The issue shows immediately when processing the completion of a DISCARD
>>> request which has an error set. DISCARD requests with no error set are fine.
>> Please share some of the crashes, I can't help you with this without
>> having something concrete.
> 
> root@stretch-opx:~# fstrim /mnt
> [  135.160000] do_io - discard failed err = 95 fd = 23
> [  135.160000] do_io - discard failed err = 95 fd = 23
> [  135.160000]  [<600850f0>] ? prepare_to_wait+0x0/0x80
> [  135.170000] ------------[ cut here ]------------
> [  135.170000] WARNING: CPU: 0 PID: 1151 at block/blk.h:439 
> generic_make_request_checks+0x3b5/0x680
> [  135.170000] Modules linked in:
> [  135.170000] CPU: 0 PID: 1151 Comm: fstrim Tainted: G W         
> 4.20.0-rc1-00005-g046844eea47e-dirty #121
> [  135.170000] Stack:
> [  135.170000]  d3821740 60744258 d3821788 00000000
> [  135.170000]  00000009 00000000 d3821750 607442ac
> [  135.170000]  d38217c0 6005034e d3821800 600e5902
> [  135.170000] Call Trace:
> [  135.170000]  [<603ffec5>] ? generic_make_request_checks+0x3b5/0x680
> [  135.170000]  [<6009271a>] ? printk+0x0/0x94
> [  135.170000]  [<6002a849>] show_stack+0x129/0x190
> [  135.170000]  [<60744258>] ? dump_stack_print_info+0xe8/0x100
> [  135.170000]  [<607442ac>] dump_stack+0x2a/0x2e
> [  135.170000]  [<6005034e>] __warn+0x13e/0x170
> [  135.170000]  [<600e5902>] ? mempool_alloc+0x92/0x1e0
> [  135.170000]  [<600504f6>] warn_slowpath_null+0x46/0x4b
> [  135.170000]  [<603ffec5>] generic_make_request_checks+0x3b5/0x680
> [  135.170000]  [<603f9783>] ? bio_alloc_bioset+0x193/0x320
> [  135.170000]  [<60044ef0>] ? set_signals+0x0/0x50
> [  135.170000]  [<60044ef0>] ? set_signals+0x0/0x50
> [  135.170000]  [<6040118f>] generic_make_request+0xdf/0x480
> [  135.170000]  [<603f92d6>] ? __bio_clone_fast+0xd6/0xe0
> [  135.170000]  [<603f9944>] ? bio_clone_fast+0x34/0x40
> [  135.170000]  [<6040961e>] blk_queue_split+0x59e/0x620
> [  135.170000]  [<60044ef0>] ? set_signals+0x0/0x50
> [  135.170000]  [<6040f84a>] blk_mq_make_request+0x7a/0x400
> [  135.170000]  [<600e5902>] ? mempool_alloc+0x92/0x1e0
> [  135.170000]  [<603fae90>] ? bio_endio+0x0/0x130
> [  135.170000]  [<604014cd>] generic_make_request+0x41d/0x480
> [  135.170000]  [<6040170f>] submit_bio+0x1df/0x1f0
> [  135.170000]  [<603f8348>] submit_bio_wait+0x68/0x90
> [  135.170000]  [<6040bd22>] blkdev_issue_discard+0x72/0xb0
> [  135.170000]  [<602174f0>] ext4_trim_all_free+0x350/0x680
> [  135.170000]  [<607ccc90>] ? _raw_spin_unlock+0x0/0x20
> [  135.170000]  [<6002ce80>] ? copy_chunk_from_user+0x0/0x30
> [  135.170000]  [<6021b5af>] ext4_trim_fs+0x1cf/0x270
> [  135.170000]  [<602109cf>] ext4_ioctl+0x1cbf/0x1f40
> [  135.170000]  [<60152e08>] ? vfs_getattr_nosec+0x68/0x90
> [  135.170000]  [<60153163>] ? cp_new_stat+0x163/0x180
> [  135.170000]  [<60165b18>] do_vfs_ioctl+0x798/0xa40
> [  135.170000]  [<60165e23>] ksys_ioctl+0x63/0xa0
> [  135.170000]  [<6078def0>] ? ptrace+0x0/0xa0
> [  135.170000]  [<60165e70>] sys_ioctl+0x10/0x20
> [  135.170000]  [<6002ce58>] handle_syscall+0x98/0xc0
> [  135.170000]  [<6075be10>] ? __waitpid+0x0/0xa0
> [  135.170000]  [<600482d6>] userspace+0x4c6/0x5b0
> [  135.170000]  [<600438af>] ? save_registers+0x1f/0x50
> [  135.170000]  [<6004b747>] ? arch_prctl+0x117/0x1c0
> [  135.170000]  [<6005c587>] ? calculate_sigpending+0x77/0x80
> [  135.170000]  [<60029204>] fork_handler+0x94/0xa0
> [  135.170000]
> [  135.170000] ---[ end trace 24ca562b91e6dbdf ]---
> 
> And hang. Let me try a few more times to see if I can get the spinlock 
> recursion and/or abort instead of hang case (they are more difficult to 
> reproduce).

This shows exactly what I was talking about, you are trying to
issue IO from irq/non-blocking context. If you look up the warning,
that's exactly what it is telling you.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-12 18:28             ` Jens Axboe
@ 2018-11-12 18:41               ` Anton Ivanov
  2018-11-12 19:11                 ` Jens Axboe
  0 siblings, 1 reply; 25+ messages in thread
From: Anton Ivanov @ 2018-11-12 18:41 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch


On 11/12/18 6:28 PM, Jens Axboe wrote:
> On 11/12/18 11:23 AM, Anton Ivanov wrote:
>> On 11/12/18 6:14 PM, Jens Axboe wrote:
>>> On 11/12/18 11:12 AM, Anton Ivanov wrote:
>>>> On 11/12/18 6:04 PM, Jens Axboe wrote:
>>>>> On 11/12/18 11:00 AM, Anton Ivanov wrote:
>>>>>> On 11/12/18 5:50 PM, Jens Axboe wrote:
>>>>>>> On 11/12/18 10:41 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>>>> This is the revised patchset for the ubd driver. Known issues:
>>>>>>>>
>>>>>>>> 1. If an error is returned to an operation different from READ/WRITE/SYNC
>>>>>>>> the block subsystem goes south finishing with a crash. I have tried everything
>>>>>>>> I can think of and whatever I could lift out of the loop and nbd driver. I do
>>>>>>>> not think that the problem is in the ubd driver. I am lost here, Jens can you
>>>>>>>> please give me a hand in handling the case when ubd_handler() gets an error in
>>>>>>>> reply to a discard.
>>>>>>> It would help if you showed a crash from doing that, and an example of
>>>>>>> how you are doing that.
>>>>>> Test case - 1G ext4 image on MS DOS filesystem.
>>>>>>
>>>>>> With the present patch, the logic around line 844 should be commented
>>>>>> out so that DISCARD is always enabled. I can send a patch where this is
>>>>>> always enabled for testing purposes.
>>>>>>
>>>>>> Mount filesystem - mount /dev/ubdb /mnt
>>>>>>
>>>>>> Try fstrim /mnt
>>>>>>
>>>>>> Crash is immediate,  but different in each case.
>>>>>>
>>>>>> Some cases contain the WARN from line 439 in blk.h . Some contain
>>>>>> spinlock recursion BUG(). Some do not contain anything - it just
>>>>>> crashes. Or hangs.
>>>>>>
>>>>>> I tried various combinations (and code theft from loop and nbd) of
>>>>>> blk_update_request, __blk_mq_end_request, blk_mq_end_request, etc - the
>>>>>> flavor of the crash changes a bit, but not the essence.
>>>>> Sorry, all of this isn't very clear to me. It definitely sounds like
>>>>> you are doing something wrong. Where are you calling falloc_punch()
>>>>> from?
>>>>    From the IO thread. It sets the error code and writes to the IPC pipe
>>>> which triggers an interrupt. Same as with any other disk IO in UML/UBD.
>>>>
>>>>> It's not even safe from ->queue_rq(), you'd need to add
>>>>> BLK_MQ_F_BLOCKING to your mq_ops flags to ensure that blk-mq
>>>>> always calls you from a context that can block. If you're doing
>>>>> it from the irq handler, well then...
>>>> The only thing the IRQ handler does is to process the completion request.
>>>>
>>>> That works OK for requests which have no error.
>>> Note the warning above on blocking from ->queue()rq...
>> This is already of help. The closest to UML UBD is NBD. The difference
>> is that there you talk to a remote server, while in UML you talk to an
>> IO thread which does all the IO.
>>
>> NBD already has MQ_F_BLOCKING enabled, I will have a look at it in more
>> detail.
>>
>>>> It also works fine (no crashes) for requests which return an error to
>>>> the FSYNC call. I did not do proper exhaustive testing with error
>>>> injection, but based on cursory testing read/write error handling also
>>>> seems to work (do not take my word for it, I will write a proper "error
>>>> injection" harness tomorrow to retest that).
>>>>
>>>> The issue shows immediately when processing the completion of a DISCARD
>>>> request which has an error set. DISCARD requests with no error set are fine.
>>> Please share some of the crashes, I can't help you with this without
>>> having something concrete.
>> root@stretch-opx:~# fstrim /mnt
>> [  135.160000] do_io - discard failed err = 95 fd = 23
>> [  135.160000] do_io - discard failed err = 95 fd = 23
>> [  135.160000]  [<600850f0>] ? prepare_to_wait+0x0/0x80
>> [  135.170000] ------------[ cut here ]------------
>> [  135.170000] WARNING: CPU: 0 PID: 1151 at block/blk.h:439
>> generic_make_request_checks+0x3b5/0x680
>> [  135.170000] Modules linked in:
>> [  135.170000] CPU: 0 PID: 1151 Comm: fstrim Tainted: G W
>> 4.20.0-rc1-00005-g046844eea47e-dirty #121
>> [  135.170000] Stack:
>> [  135.170000]  d3821740 60744258 d3821788 00000000
>> [  135.170000]  00000009 00000000 d3821750 607442ac
>> [  135.170000]  d38217c0 6005034e d3821800 600e5902
>> [  135.170000] Call Trace:
>> [  135.170000]  [<603ffec5>] ? generic_make_request_checks+0x3b5/0x680
>> [  135.170000]  [<6009271a>] ? printk+0x0/0x94
>> [  135.170000]  [<6002a849>] show_stack+0x129/0x190
>> [  135.170000]  [<60744258>] ? dump_stack_print_info+0xe8/0x100
>> [  135.170000]  [<607442ac>] dump_stack+0x2a/0x2e
>> [  135.170000]  [<6005034e>] __warn+0x13e/0x170
>> [  135.170000]  [<600e5902>] ? mempool_alloc+0x92/0x1e0
>> [  135.170000]  [<600504f6>] warn_slowpath_null+0x46/0x4b
>> [  135.170000]  [<603ffec5>] generic_make_request_checks+0x3b5/0x680
>> [  135.170000]  [<603f9783>] ? bio_alloc_bioset+0x193/0x320
>> [  135.170000]  [<60044ef0>] ? set_signals+0x0/0x50
>> [  135.170000]  [<60044ef0>] ? set_signals+0x0/0x50
>> [  135.170000]  [<6040118f>] generic_make_request+0xdf/0x480
>> [  135.170000]  [<603f92d6>] ? __bio_clone_fast+0xd6/0xe0
>> [  135.170000]  [<603f9944>] ? bio_clone_fast+0x34/0x40
>> [  135.170000]  [<6040961e>] blk_queue_split+0x59e/0x620
>> [  135.170000]  [<60044ef0>] ? set_signals+0x0/0x50
>> [  135.170000]  [<6040f84a>] blk_mq_make_request+0x7a/0x400
>> [  135.170000]  [<600e5902>] ? mempool_alloc+0x92/0x1e0
>> [  135.170000]  [<603fae90>] ? bio_endio+0x0/0x130
>> [  135.170000]  [<604014cd>] generic_make_request+0x41d/0x480
>> [  135.170000]  [<6040170f>] submit_bio+0x1df/0x1f0
>> [  135.170000]  [<603f8348>] submit_bio_wait+0x68/0x90
>> [  135.170000]  [<6040bd22>] blkdev_issue_discard+0x72/0xb0
>> [  135.170000]  [<602174f0>] ext4_trim_all_free+0x350/0x680
>> [  135.170000]  [<607ccc90>] ? _raw_spin_unlock+0x0/0x20
>> [  135.170000]  [<6002ce80>] ? copy_chunk_from_user+0x0/0x30
>> [  135.170000]  [<6021b5af>] ext4_trim_fs+0x1cf/0x270
>> [  135.170000]  [<602109cf>] ext4_ioctl+0x1cbf/0x1f40
>> [  135.170000]  [<60152e08>] ? vfs_getattr_nosec+0x68/0x90
>> [  135.170000]  [<60153163>] ? cp_new_stat+0x163/0x180
>> [  135.170000]  [<60165b18>] do_vfs_ioctl+0x798/0xa40
>> [  135.170000]  [<60165e23>] ksys_ioctl+0x63/0xa0
>> [  135.170000]  [<6078def0>] ? ptrace+0x0/0xa0
>> [  135.170000]  [<60165e70>] sys_ioctl+0x10/0x20
>> [  135.170000]  [<6002ce58>] handle_syscall+0x98/0xc0
>> [  135.170000]  [<6075be10>] ? __waitpid+0x0/0xa0
>> [  135.170000]  [<600482d6>] userspace+0x4c6/0x5b0
>> [  135.170000]  [<600438af>] ? save_registers+0x1f/0x50
>> [  135.170000]  [<6004b747>] ? arch_prctl+0x117/0x1c0
>> [  135.170000]  [<6005c587>] ? calculate_sigpending+0x77/0x80
>> [  135.170000]  [<60029204>] fork_handler+0x94/0xa0
>> [  135.170000]
>> [  135.170000] ---[ end trace 24ca562b91e6dbdf ]---
>>
>> And hang. Let me try a few more times to see if I can get the spinlock
>> recursion and/or abort instead of hang case (they are more difficult to
>> reproduce).
> This shows exactly what I was talking about, you are trying to
> issue IO from irq/non-blocking context. If you look up the warning,
> that's exactly what it is telling you.


So why does this happen only in the case of an error? Why nothing is 
triggered in the OK path?

After adding this flag I no longer get the  block/blk.h:439 The spinlock 
recursion and the other barfs are alive and well.

Back to "what is that" - can I execute blk_update_request, 
__blk_mq_end_request, blk_mq_end_request out of an interrupt context?

It seems that I can when error is BLK_STS_OK, but not when it is not.

>
-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-12 18:41               ` Anton Ivanov
@ 2018-11-12 19:11                 ` Jens Axboe
  2018-11-12 19:26                   ` Jens Axboe
  0 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-12 19:11 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard, hch

On 11/12/18 11:41 AM, Anton Ivanov wrote:
>>> And hang. Let me try a few more times to see if I can get the spinlock
>>> recursion and/or abort instead of hang case (they are more difficult to
>>> reproduce).
>> This shows exactly what I was talking about, you are trying to
>> issue IO from irq/non-blocking context. If you look up the warning,
>> that's exactly what it is telling you.
> 
> 
> So why does this happen only in the case of an error? Why nothing is 
> triggered in the OK path?
> 
> After adding this flag I no longer get the  block/blk.h:439 The spinlock 
> recursion and the other barfs are alive and well.
> 
> Back to "what is that" - can I execute blk_update_request, 
> __blk_mq_end_request, blk_mq_end_request out of an interrupt context?
> 
> It seems that I can when error is BLK_STS_OK, but not when it is not.

Post a new stack trace with BLOCKING set. Of course you can call the end
request functions from irq context, most drivers end up doing that. For
OK and error.

There's something fundamentally wrong with your case, it might be reuse
or something like that.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-12 19:11                 ` Jens Axboe
@ 2018-11-12 19:26                   ` Jens Axboe
  2018-11-13  8:42                     ` Anton Ivanov
  0 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-12 19:26 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard, hch

On 11/12/18 12:11 PM, Jens Axboe wrote:
> On 11/12/18 11:41 AM, Anton Ivanov wrote:
>>>> And hang. Let me try a few more times to see if I can get the spinlock
>>>> recursion and/or abort instead of hang case (they are more difficult to
>>>> reproduce).
>>> This shows exactly what I was talking about, you are trying to
>>> issue IO from irq/non-blocking context. If you look up the warning,
>>> that's exactly what it is telling you.
>>
>>
>> So why does this happen only in the case of an error? Why nothing is 
>> triggered in the OK path?
>>
>> After adding this flag I no longer get the  block/blk.h:439 The spinlock 
>> recursion and the other barfs are alive and well.
>>
>> Back to "what is that" - can I execute blk_update_request, 
>> __blk_mq_end_request, blk_mq_end_request out of an interrupt context?
>>
>> It seems that I can when error is BLK_STS_OK, but not when it is not.
> 
> Post a new stack trace with BLOCKING set. Of course you can call the end
> request functions from irq context, most drivers end up doing that. For
> OK and error.
> 
> There's something fundamentally wrong with your case, it might be reuse
> or something like that.

Took a quick look at your code, and a quick guess would be that your
discard completions are bad. For discard, initialize io_req->length
with -bio->bi_size >> 9. I think this is preventing your discard
completion from ended properly.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-12 19:26                   ` Jens Axboe
@ 2018-11-13  8:42                     ` Anton Ivanov
  2018-11-13 10:15                       ` Anton Ivanov
  0 siblings, 1 reply; 25+ messages in thread
From: Anton Ivanov @ 2018-11-13  8:42 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch


On 11/12/18 7:26 PM, Jens Axboe wrote:
> On 11/12/18 12:11 PM, Jens Axboe wrote:
>> On 11/12/18 11:41 AM, Anton Ivanov wrote:
>>>>> And hang. Let me try a few more times to see if I can get the spinlock
>>>>> recursion and/or abort instead of hang case (they are more difficult to
>>>>> reproduce).
>>>> This shows exactly what I was talking about, you are trying to
>>>> issue IO from irq/non-blocking context. If you look up the warning,
>>>> that's exactly what it is telling you.
>>>
>>> So why does this happen only in the case of an error? Why nothing is
>>> triggered in the OK path?
>>>
>>> After adding this flag I no longer get the  block/blk.h:439 The spinlock
>>> recursion and the other barfs are alive and well.
>>>
>>> Back to "what is that" - can I execute blk_update_request,
>>> __blk_mq_end_request, blk_mq_end_request out of an interrupt context?
>>>
>>> It seems that I can when error is BLK_STS_OK, but not when it is not.
>> Post a new stack trace with BLOCKING set. Of course you can call the end
>> request functions from irq context, most drivers end up doing that. For
>> OK and error.
>>
>> There's something fundamentally wrong with your case, it might be reuse
>> or something like that.
> Took a quick look at your code, and a quick guess would be that your
> discard completions are bad. For discard, initialize io_req->length
> with -bio->bi_size >> 9. I think this is preventing your discard
> completion from ended properly.

1. If that is the issue both loop and nbd have it - they use 
blk_rq_bytes(req) for their internal req structure length field same as 
I do and should suffer from the same fate if discard encounters an 
error. The code in ubd is now pretty much identical to what they do.

loop (more readable): 
https://elixir.bootlin.com/linux/latest/source/drivers/block/loop.c#L419

nbd: https://elixir.bootlin.com/linux/latest/source/drivers/block/nbd.c#L460

2. This is internal length. It is unused if I invoke blk_mq_end_request 
which picks up the sizing out of the req. This is what loop does when it 
encounters an error and I tried that too without success. The ->length 
field gets used only if the request is fed into blk_update_request.

3. Unless I am missing something, bio->bi_size does not exist in 4.19 
and 4.20 - 
https://elixir.bootlin.com/linux/v4.20-rc1/source/include/linux/blk_types.h#L144

4. The major remaining difference between UBD as it is now and NBD/Loop 
is that these have complete functions and anything different from 
READ/WRITE is fed straight to complete.

https://elixir.bootlin.com/linux/latest/source/drivers/block/loop.c#L1787

It does not go down the blk_mq_end_request path.  I am going to try 
sorting that out in first instance and see if that helps.

-- 

Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-13  8:42                     ` Anton Ivanov
@ 2018-11-13 10:15                       ` Anton Ivanov
  2018-11-13 10:20                         ` Anton Ivanov
  2018-11-13 13:30                         ` Jens Axboe
  0 siblings, 2 replies; 25+ messages in thread
From: Anton Ivanov @ 2018-11-13 10:15 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch


On 11/13/18 8:42 AM, Anton Ivanov wrote:
>
> On 11/12/18 7:26 PM, Jens Axboe wrote:
>> On 11/12/18 12:11 PM, Jens Axboe wrote:
>>> On 11/12/18 11:41 AM, Anton Ivanov wrote:
>>>>>> And hang. Let me try a few more times to see if I can get the 
>>>>>> spinlock
>>>>>> recursion and/or abort instead of hang case (they are more 
>>>>>> difficult to
>>>>>> reproduce).
>>>>> This shows exactly what I was talking about, you are trying to
>>>>> issue IO from irq/non-blocking context. If you look up the warning,
>>>>> that's exactly what it is telling you.
>>>>
>>>> So why does this happen only in the case of an error? Why nothing is
>>>> triggered in the OK path?
>>>>
>>>> After adding this flag I no longer get the  block/blk.h:439 The 
>>>> spinlock
>>>> recursion and the other barfs are alive and well.
>>>>
>>>> Back to "what is that" - can I execute blk_update_request,
>>>> __blk_mq_end_request, blk_mq_end_request out of an interrupt context?
>>>>
>>>> It seems that I can when error is BLK_STS_OK, but not when it is not.
>>> Post a new stack trace with BLOCKING set. Of course you can call the 
>>> end
>>> request functions from irq context, most drivers end up doing that. For
>>> OK and error.
>>>
>>> There's something fundamentally wrong with your case, it might be reuse
>>> or something like that.
>> Took a quick look at your code, and a quick guess would be that your
>> discard completions are bad. For discard, initialize io_req->length
>> with -bio->bi_size >> 9. I think this is preventing your discard
>> completion from ended properly.
>
> 1. If that is the issue both loop and nbd have it - they use 
> blk_rq_bytes(req) for their internal req structure length field same 
> as I do and should suffer from the same fate if discard encounters an 
> error. The code in ubd is now pretty much identical to what they do.
>
> loop (more readable): 
> https://elixir.bootlin.com/linux/latest/source/drivers/block/loop.c#L419
>
> nbd: 
> https://elixir.bootlin.com/linux/latest/source/drivers/block/nbd.c#L460
>
> 2. This is internal length. It is unused if I invoke 
> blk_mq_end_request which picks up the sizing out of the req. This is 
> what loop does when it encounters an error and I tried that too 
> without success. The ->length field gets used only if the request is 
> fed into blk_update_request.
>
> 3. Unless I am missing something, bio->bi_size does not exist in 4.19 
> and 4.20 - 
> https://elixir.bootlin.com/linux/v4.20-rc1/source/include/linux/blk_types.h#L144
>
> 4. The major remaining difference between UBD as it is now and 
> NBD/Loop is that these have complete functions and anything different 
> from READ/WRITE is fed straight to complete.
>
> https://elixir.bootlin.com/linux/latest/source/drivers/block/loop.c#L1787
>
> It does not go down the blk_mq_end_request path.  I am going to try 
> sorting that out in first instance and see if that helps.
>
I have figured out part of it. It is not caused by the block layer at 
all. The offending parts are printks in the io thread. You simply cannot 
printk out of there, that causes the spinlock recursion, irqs, on/off, 
etc warning.

This means that any errors MUST be reported only after they have gone 
back to the main UML kernel thread.

-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-13 10:15                       ` Anton Ivanov
@ 2018-11-13 10:20                         ` Anton Ivanov
  2018-11-13 13:30                         ` Jens Axboe
  1 sibling, 0 replies; 25+ messages in thread
From: Anton Ivanov @ 2018-11-13 10:20 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch


On 11/13/18 10:15 AM, Anton Ivanov wrote:
>
> On 11/13/18 8:42 AM, Anton Ivanov wrote:
>>
>> On 11/12/18 7:26 PM, Jens Axboe wrote:
>>> On 11/12/18 12:11 PM, Jens Axboe wrote:
>>>> On 11/12/18 11:41 AM, Anton Ivanov wrote:
>>>>>>> And hang. Let me try a few more times to see if I can get the 
>>>>>>> spinlock
>>>>>>> recursion and/or abort instead of hang case (they are more 
>>>>>>> difficult to
>>>>>>> reproduce).
>>>>>> This shows exactly what I was talking about, you are trying to
>>>>>> issue IO from irq/non-blocking context. If you look up the warning,
>>>>>> that's exactly what it is telling you.
>>>>>
>>>>> So why does this happen only in the case of an error? Why nothing is
>>>>> triggered in the OK path?
>>>>>
>>>>> After adding this flag I no longer get the block/blk.h:439 The 
>>>>> spinlock
>>>>> recursion and the other barfs are alive and well.
>>>>>
>>>>> Back to "what is that" - can I execute blk_update_request,
>>>>> __blk_mq_end_request, blk_mq_end_request out of an interrupt context?
>>>>>
>>>>> It seems that I can when error is BLK_STS_OK, but not when it is not.
>>>> Post a new stack trace with BLOCKING set. Of course you can call 
>>>> the end
>>>> request functions from irq context, most drivers end up doing that. 
>>>> For
>>>> OK and error.
>>>>
>>>> There's something fundamentally wrong with your case, it might be 
>>>> reuse
>>>> or something like that.
>>> Took a quick look at your code, and a quick guess would be that your
>>> discard completions are bad. For discard, initialize io_req->length
>>> with -bio->bi_size >> 9. I think this is preventing your discard
>>> completion from ended properly.
>>
>> 1. If that is the issue both loop and nbd have it - they use 
>> blk_rq_bytes(req) for their internal req structure length field same 
>> as I do and should suffer from the same fate if discard encounters an 
>> error. The code in ubd is now pretty much identical to what they do.
>>
>> loop (more readable): 
>> https://elixir.bootlin.com/linux/latest/source/drivers/block/loop.c#L419
>>
>> nbd: 
>> https://elixir.bootlin.com/linux/latest/source/drivers/block/nbd.c#L460
>>
>> 2. This is internal length. It is unused if I invoke 
>> blk_mq_end_request which picks up the sizing out of the req. This is 
>> what loop does when it encounters an error and I tried that too 
>> without success. The ->length field gets used only if the request is 
>> fed into blk_update_request.
>>
>> 3. Unless I am missing something, bio->bi_size does not exist in 4.19 
>> and 4.20 - 
>> https://elixir.bootlin.com/linux/v4.20-rc1/source/include/linux/blk_types.h#L144
>>
>> 4. The major remaining difference between UBD as it is now and 
>> NBD/Loop is that these have complete functions and anything different 
>> from READ/WRITE is fed straight to complete.
>>
>> https://elixir.bootlin.com/linux/latest/source/drivers/block/loop.c#L1787 
>>
>>
>> It does not go down the blk_mq_end_request path.  I am going to try 
>> sorting that out in first instance and see if that helps.
>>
> I have figured out part of it. It is not caused by the block layer at 
> all. The offending parts are printks in the io thread. You simply 
> cannot printk out of there, that causes the spinlock recursion, irqs, 
> on/off, etc warning.
>
> This means that any errors MUST be reported only after they have gone 
> back to the main UML kernel thread.

And it works fine now which means I will bake a new series and send them 
out in the afternoon.

Many thanks to Jens and everyone else who helped debug it.

-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-13 10:15                       ` Anton Ivanov
  2018-11-13 10:20                         ` Anton Ivanov
@ 2018-11-13 13:30                         ` Jens Axboe
  2018-11-13 13:56                           ` Anton Ivanov
  1 sibling, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-13 13:30 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard, hch

On 11/13/18 3:15 AM, Anton Ivanov wrote:
> I have figured out part of it. It is not caused by the block layer at 
> all. The offending parts are printks in the io thread. You simply cannot 
> printk out of there, that causes the spinlock recursion, irqs, on/off, 
> etc warning.
> 
> This means that any errors MUST be reported only after they have gone 
> back to the main UML kernel thread.

That's a pretty serious issue with UML, there's absolutely nothing wrong
with doing printk from IRQ context, or from an IO thread like this one.
You don't know what the call stack below you will do for functions you
call, this is a ticking time bomb.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-13 13:30                         ` Jens Axboe
@ 2018-11-13 13:56                           ` Anton Ivanov
  2018-11-13 15:14                             ` Jens Axboe
  0 siblings, 1 reply; 25+ messages in thread
From: Anton Ivanov @ 2018-11-13 13:56 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch

On 13/11/2018 13:30, Jens Axboe wrote:
> On 11/13/18 3:15 AM, Anton Ivanov wrote:
>> I have figured out part of it. It is not caused by the block layer at
>> all. The offending parts are printks in the io thread. You simply cannot
>> printk out of there, that causes the spinlock recursion, irqs, on/off,
>> etc warning.
>>
>> This means that any errors MUST be reported only after they have gone
>> back to the main UML kernel thread.
> That's a pretty serious issue with UML, there's absolutely nothing wrong
> with doing printk from IRQ context, or from an IO thread like this one.
> You don't know what the call stack below you will do for functions you
> call, this is a ticking time bomb.

This is not a normal kernel IO thread. It is a host-side helper thread. 
There are two of those - one for UBD and and one for sigio.

I am not qualified to judge what is normal and not there, but they 
operate "outside" normal kernel authority and communicate only via their 
IPC pipe.

It is not interrupt context - it is an equivalent of "inside the actual 
device".

They are doing specific "hardware emulation". In fact, while their IPC 
presently assumes shared address space with the kernel, that does not 
need to be the case.

So there is little or no reason for them to have access to any normal 
kernel facilities after their initial startup. The benefits of printks 
there are minimal (if any). If there is an error in the IPC pipe it goes 
both way so the kernel side will see it. If there is an error in the 
actual operation, it is conveyed via the IPC so the kernel sees it as well.

So I am not sure about the ticking bomb. Unpleasant limitation for 
debugging - yes. Ticking bomb... Not sure.

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-13 13:56                           ` Anton Ivanov
@ 2018-11-13 15:14                             ` Jens Axboe
  2018-11-13 15:24                               ` Anton Ivanov
  0 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-13 15:14 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard, hch

On 11/13/18 6:56 AM, Anton Ivanov wrote:
> On 13/11/2018 13:30, Jens Axboe wrote:
>> On 11/13/18 3:15 AM, Anton Ivanov wrote:
>>> I have figured out part of it. It is not caused by the block layer at
>>> all. The offending parts are printks in the io thread. You simply cannot
>>> printk out of there, that causes the spinlock recursion, irqs, on/off,
>>> etc warning.
>>>
>>> This means that any errors MUST be reported only after they have gone
>>> back to the main UML kernel thread.
>> That's a pretty serious issue with UML, there's absolutely nothing wrong
>> with doing printk from IRQ context, or from an IO thread like this one.
>> You don't know what the call stack below you will do for functions you
>> call, this is a ticking time bomb.
> 
> This is not a normal kernel IO thread. It is a host-side helper thread. 
> There are two of those - one for UBD and and one for sigio.
> 
> I am not qualified to judge what is normal and not there, but they 
> operate "outside" normal kernel authority and communicate only via their 
> IPC pipe.
> 
> It is not interrupt context - it is an equivalent of "inside the actual 
> device".

But it's running in the kernel, which means it's eligible to call
any functions. If you have ANY function in your call path that
isn't 100% controller by ubd or UML, then all bets are off.

I don't even see how your new patch 4 fixes anything, whether the
printk happens in the io_thread() or some call off of that should
be the same issue.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-13 15:14                             ` Jens Axboe
@ 2018-11-13 15:24                               ` Anton Ivanov
  2018-11-13 15:29                                 ` Jens Axboe
  0 siblings, 1 reply; 25+ messages in thread
From: Anton Ivanov @ 2018-11-13 15:24 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch


On 11/13/18 3:14 PM, Jens Axboe wrote:
> On 11/13/18 6:56 AM, Anton Ivanov wrote:
>> On 13/11/2018 13:30, Jens Axboe wrote:
>>> On 11/13/18 3:15 AM, Anton Ivanov wrote:
>>>> I have figured out part of it. It is not caused by the block layer at
>>>> all. The offending parts are printks in the io thread. You simply cannot
>>>> printk out of there, that causes the spinlock recursion, irqs, on/off,
>>>> etc warning.
>>>>
>>>> This means that any errors MUST be reported only after they have gone
>>>> back to the main UML kernel thread.
>>> That's a pretty serious issue with UML, there's absolutely nothing wrong
>>> with doing printk from IRQ context, or from an IO thread like this one.
>>> You don't know what the call stack below you will do for functions you
>>> call, this is a ticking time bomb.
>> This is not a normal kernel IO thread. It is a host-side helper thread.
>> There are two of those - one for UBD and and one for sigio.
>>
>> I am not qualified to judge what is normal and not there, but they
>> operate "outside" normal kernel authority and communicate only via their
>> IPC pipe.
>>
>> It is not interrupt context - it is an equivalent of "inside the actual
>> device".
> But it's running in the kernel, which means it's eligible to call
> any functions. If you have ANY function in your call path that
> isn't 100% controller by ubd or UML, then all bets are off.

Actually - no it is not. It is spawned once early at initialization and 
it is not doing anything besides disk IO into/from buffers allocated by 
the kernel proper. In fact it does not execute _ANY_ kernel calls at 
all. None.

Every single thing it executes is on the other side of the hypervisor 
boundary and on the host side.

>
> I don't even see how your new patch 4 fixes anything, whether the
> printk happens in the io_thread() or some call off of that should
> be the same issue.

An equivalent would be that let's say "QEMU is allowed at any time, 
regardless of context, interrupt state, lock state and anythying else to 
invoke a kernel routine". I do not think anyone would agree with that 
statement.

It is obviously invalid. Ditto for VMware. Ditto for VirtualBox.

So why does a portion of the UML hypervisor which exists outside the 
kernel get an entitlement to execute kernel functions while none of 
these do?

It should not have it to start off with - it is part of the host-side in 
the hypervisor.

IMHO what is missing in the patch is a comment which ads a BIG FAT 
warning that do_io is not allowed to invoke any kernel resources. It is 
not in or any part of the kernel. It just happens to share address space 
with it to simulate "DMA".

I am happy to add it. In fact, it may be worth it to isolate the source 
completely and move to a different file to avoid any confusion that it 
is a part of the kernel in any way.

-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: New Patch series for the UBD Driver
  2018-11-13 15:24                               ` Anton Ivanov
@ 2018-11-13 15:29                                 ` Jens Axboe
  0 siblings, 0 replies; 25+ messages in thread
From: Jens Axboe @ 2018-11-13 15:29 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard, hch

On 11/13/18 8:24 AM, Anton Ivanov wrote:
> 
> On 11/13/18 3:14 PM, Jens Axboe wrote:
>> On 11/13/18 6:56 AM, Anton Ivanov wrote:
>>> On 13/11/2018 13:30, Jens Axboe wrote:
>>>> On 11/13/18 3:15 AM, Anton Ivanov wrote:
>>>>> I have figured out part of it. It is not caused by the block layer at
>>>>> all. The offending parts are printks in the io thread. You simply cannot
>>>>> printk out of there, that causes the spinlock recursion, irqs, on/off,
>>>>> etc warning.
>>>>>
>>>>> This means that any errors MUST be reported only after they have gone
>>>>> back to the main UML kernel thread.
>>>> That's a pretty serious issue with UML, there's absolutely nothing wrong
>>>> with doing printk from IRQ context, or from an IO thread like this one.
>>>> You don't know what the call stack below you will do for functions you
>>>> call, this is a ticking time bomb.
>>> This is not a normal kernel IO thread. It is a host-side helper thread.
>>> There are two of those - one for UBD and and one for sigio.
>>>
>>> I am not qualified to judge what is normal and not there, but they
>>> operate "outside" normal kernel authority and communicate only via their
>>> IPC pipe.
>>>
>>> It is not interrupt context - it is an equivalent of "inside the actual
>>> device".
>> But it's running in the kernel, which means it's eligible to call
>> any functions. If you have ANY function in your call path that
>> isn't 100% controller by ubd or UML, then all bets are off.
> 
> Actually - no it is not. It is spawned once early at initialization and 
> it is not doing anything besides disk IO into/from buffers allocated by 
> the kernel proper. In fact it does not execute _ANY_ kernel calls at 
> all. None.
> 
> Every single thing it executes is on the other side of the hypervisor 
> boundary and on the host side.

OK, I guess it's fine then and not something to worry about. Probably
could do with a comment, then!

> IMHO what is missing in the patch is a comment which ads a BIG FAT 
> warning that do_io is not allowed to invoke any kernel resources. It is 
> not in or any part of the kernel. It just happens to share address space 
> with it to simulate "DMA".
> 
> I am happy to add it. In fact, it may be worth it to isolate the source 
> completely and move to a different file to avoid any confusion that it 
> is a part of the kernel in any way.

Yes exactly, I think that would help a lot.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2018-11-13 15:29 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-12 17:41 New Patch series for the UBD Driver anton.ivanov
2018-11-12 17:41 ` [PATCH 1/3] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
2018-11-12 17:48   ` Jens Axboe
2018-11-12 17:52     ` Anton Ivanov
2018-11-12 17:42 ` [PATCH 2/3] um: Clean-up command processing in " anton.ivanov
2018-11-12 17:42 ` [PATCH 3/3] um: Add support for DISCARD in the UBD Driver anton.ivanov
2018-11-12 17:50 ` New Patch series for " Jens Axboe
2018-11-12 18:00   ` Anton Ivanov
2018-11-12 18:04     ` Jens Axboe
2018-11-12 18:12       ` Anton Ivanov
2018-11-12 18:14         ` Jens Axboe
2018-11-12 18:23           ` Anton Ivanov
2018-11-12 18:24             ` Anton Ivanov
2018-11-12 18:28             ` Jens Axboe
2018-11-12 18:41               ` Anton Ivanov
2018-11-12 19:11                 ` Jens Axboe
2018-11-12 19:26                   ` Jens Axboe
2018-11-13  8:42                     ` Anton Ivanov
2018-11-13 10:15                       ` Anton Ivanov
2018-11-13 10:20                         ` Anton Ivanov
2018-11-13 13:30                         ` Jens Axboe
2018-11-13 13:56                           ` Anton Ivanov
2018-11-13 15:14                             ` Jens Axboe
2018-11-13 15:24                               ` Anton Ivanov
2018-11-13 15:29                                 ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.