All of lore.kernel.org
 help / color / mirror / Atom feed
* Revised and fixed patchset
@ 2018-11-13 11:59 anton.ivanov
  2018-11-13 11:59 ` [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
                   ` (3 more replies)
  0 siblings, 4 replies; 25+ messages in thread
From: anton.ivanov @ 2018-11-13 11:59 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch

The culprit was located - it was the printk complain in the IO thread.
Testing that further revealed that printk-ing out of it is unsafe.

This series includes a patch to remove all of these just in case.


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver
  2018-11-13 11:59 Revised and fixed patchset anton.ivanov
@ 2018-11-13 11:59 ` anton.ivanov
  2018-11-13 13:33   ` Jens Axboe
  2018-11-14 15:32   ` Christoph Hellwig
  2018-11-13 11:59 ` [PATCH 2/4] um: Clean-up command processing in " anton.ivanov
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 25+ messages in thread
From: anton.ivanov @ 2018-11-13 11:59 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Switch to block mq-constants for both commands and error codes.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c | 51 ++++++++++++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 28c40624bcb6..331837f1f632 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -43,11 +43,12 @@
 #include <os.h>
 #include "cow.h"
 
-enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
+#define UBD_SECTOR_SIZE (1 << 9)
+/* Max request size is determined by sector mask - 32K */
+#define UBD_MAX_REQUEST (8 * sizeof(long))
 
 struct io_thread_req {
 	struct request *req;
-	enum ubd_req op;
 	int fds[2];
 	unsigned long offsets[2];
 	unsigned long long offset;
@@ -511,15 +512,13 @@ static void ubd_handler(void)
 		}
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			struct io_thread_req *io_req = (*irq_req_buffer)[count];
-			int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK;
 
-			if (!blk_update_request(io_req->req, err, io_req->length))
-				__blk_mq_end_request(io_req->req, err);
+			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
+				__blk_mq_end_request(io_req->req, io_req->error);
 
 			kfree(io_req);
 		}
 	}
-
 	reactivate_fd(thread_fd, UBD_IRQ);
 }
 
@@ -830,6 +829,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 		if(err < 0) goto error;
 		ubd_dev->cow.fd = err;
 	}
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 	return 0;
  error:
 	os_close_file(ubd_dev->fd);
@@ -882,7 +882,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
 	return 0;
 }
 
-#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
+#define ROUND_BLOCK(n) ((n + (UBD_SECTOR_SIZE - 1)) & (-UBD_SECTOR_SIZE))
 
 static const struct blk_mq_ops ubd_mq_ops = {
 	.queue_rq = ubd_queue_rq,
@@ -1277,7 +1277,7 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
 	if(req->length > (sizeof(req->sector_mask) * 8) << 9)
 		panic("Operation too long");
 
-	if(req->op == UBD_READ) {
+	if (req_op(req->req) == REQ_OP_READ) {
 		for(i = 0; i < req->length >> 9; i++){
 			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
 				ubd_set_bit(i, (unsigned char *)
@@ -1307,15 +1307,12 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
 		io_req->fds[0] = dev->fd;
 	io_req->error = 0;
 
-	if (req_op(req) == REQ_OP_FLUSH) {
-		io_req->op = UBD_FLUSH;
-	} else {
+	if (req_op(req) != REQ_OP_FLUSH) {
 		io_req->fds[1] = dev->fd;
 		io_req->cow_offset = -1;
 		io_req->offset = off;
 		io_req->length = bvec->bv_len;
 		io_req->sector_mask = 0;
-		io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE;
 		io_req->offsets[0] = 0;
 		io_req->offsets[1] = dev->cow.data_offset;
 		io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
@@ -1413,22 +1410,36 @@ static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 	return -EINVAL;
 }
 
+static int map_error(int error_code)
+{
+	switch (error_code) {
+	case 0:
+		return BLK_STS_OK;
+	case ENOSYS:
+	case EOPNOTSUPP:
+		return BLK_STS_NOTSUPP;
+	case ENOSPC:
+		return BLK_STS_NOSPC;
+	}
+	return BLK_STS_IOERR;
+}
+
 static int update_bitmap(struct io_thread_req *req)
 {
 	int n;
 
 	if(req->cow_offset == -1)
-		return 0;
+		return map_error(0);
 
 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
 			  sizeof(req->bitmap_words), req->cow_offset);
 	if(n != sizeof(req->bitmap_words)){
 		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
 		       req->fds[1]);
-		return 1;
+		return map_error(-n);
 	}
 
-	return 0;
+	return map_error(0);
 }
 
 static void do_io(struct io_thread_req *req)
@@ -1438,13 +1449,13 @@ static void do_io(struct io_thread_req *req)
 	int n, nsectors, start, end, bit;
 	__u64 off;
 
-	if (req->op == UBD_FLUSH) {
+	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
 		n = os_sync_file(req->fds[0]);
 		if (n != 0) {
 			printk("do_io - sync failed err = %d "
 			       "fd = %d\n", -n, req->fds[0]);
-			req->error = 1;
+			req->error = map_error(-n);
 		}
 		return;
 	}
@@ -1464,7 +1475,7 @@ static void do_io(struct io_thread_req *req)
 		len = (end - start) * req->sectorsize;
 		buf = &req->buffer[start * req->sectorsize];
 
-		if(req->op == UBD_READ){
+		if (req_op(req->req) == REQ_OP_READ) {
 			n = 0;
 			do {
 				buf = &buf[n];
@@ -1473,7 +1484,7 @@ static void do_io(struct io_thread_req *req)
 				if (n < 0) {
 					printk("do_io - read failed, err = %d "
 					       "fd = %d\n", -n, req->fds[bit]);
-					req->error = 1;
+					req->error = map_error(-n);
 					return;
 				}
 			} while((n < len) && (n != 0));
@@ -1483,7 +1494,7 @@ static void do_io(struct io_thread_req *req)
 			if(n != len){
 				printk("do_io - write failed err = %d "
 				       "fd = %d\n", -n, req->fds[bit]);
-				req->error = 1;
+				req->error = map_error(-n);
 				return;
 			}
 		}
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 2/4] um: Clean-up command processing in UML UBD driver
  2018-11-13 11:59 Revised and fixed patchset anton.ivanov
  2018-11-13 11:59 ` [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
@ 2018-11-13 11:59 ` anton.ivanov
  2018-11-13 13:34   ` Jens Axboe
  2018-11-13 11:59 ` [PATCH 3/4] um: Remove unsafe printks from the io thread anton.ivanov
  2018-11-13 11:59 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
  3 siblings, 1 reply; 25+ messages in thread
From: anton.ivanov @ 2018-11-13 11:59 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Clean-up command processing and return BLK_STS_NOTSUP for
uknown commands.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c | 64 ++++++++++++++++++++++++++++------------------
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 331837f1f632..0f02373ef632 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1307,21 +1307,25 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
 		io_req->fds[0] = dev->fd;
 	io_req->error = 0;
 
-	if (req_op(req) != REQ_OP_FLUSH) {
-		io_req->fds[1] = dev->fd;
-		io_req->cow_offset = -1;
-		io_req->offset = off;
+	if (bvec != NULL) {
 		io_req->length = bvec->bv_len;
-		io_req->sector_mask = 0;
-		io_req->offsets[0] = 0;
-		io_req->offsets[1] = dev->cow.data_offset;
 		io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
-		io_req->sectorsize = 1 << 9;
+	} else {
+		io_req->buffer = NULL;
+		io_req->length = blk_rq_bytes(req);
+	}
 
-		if (dev->cow.file) {
-			cowify_req(io_req, dev->cow.bitmap,
-				   dev->cow.bitmap_offset, dev->cow.bitmap_len);
-		}
+	io_req->sectorsize = UBD_SECTOR_SIZE;
+	io_req->fds[1] = dev->fd;
+	io_req->cow_offset = -1;
+	io_req->offset = off;
+	io_req->sector_mask = 0;
+	io_req->offsets[0] = 0;
+	io_req->offsets[1] = dev->cow.data_offset;
+
+	if (dev->cow.file) {
+		cowify_req(io_req, dev->cow.bitmap,
+			   dev->cow.bitmap_offset, dev->cow.bitmap_len);
 	}
 
 	ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
@@ -1330,7 +1334,6 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
 			pr_err("write to io thread failed: %d\n", -ret);
 		kfree(io_req);
 	}
-
 	return ret;
 }
 
@@ -1344,20 +1347,31 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(req);
 
 	spin_lock_irq(&ubd_dev->lock);
-
-	if (req_op(req) == REQ_OP_FLUSH) {
+	switch (req_op(req)) {
+	/* operations with no lentgth/offset arguments */
+	case REQ_OP_FLUSH:
 		ret = ubd_queue_one_vec(hctx, req, 0, NULL);
-	} else {
-		struct req_iterator iter;
-		struct bio_vec bvec;
-		u64 off = (u64)blk_rq_pos(req) << 9;
-
-		rq_for_each_segment(bvec, req, iter) {
-			ret = ubd_queue_one_vec(hctx, req, off, &bvec);
-			if (ret < 0)
-				goto out;
-			off += bvec.bv_len;
+		break;
+	/* operations with bio_vec arguments */
+	case REQ_OP_READ:
+	case REQ_OP_WRITE:
+		{
+			struct req_iterator iter;
+			struct bio_vec bvec;
+			u64 off = (u64)blk_rq_pos(req) << 9;
+
+			rq_for_each_segment(bvec, req, iter) {
+				ret = ubd_queue_one_vec(hctx, req, off, &bvec);
+				if (ret < 0)
+					goto out;
+				off += bvec.bv_len;
+			}
 		}
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		spin_unlock_irq(&ubd_dev->lock);
+		return BLK_STS_NOTSUPP;
 	}
 out:
 	spin_unlock_irq(&ubd_dev->lock);
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 3/4] um: Remove unsafe printks from the io thread
  2018-11-13 11:59 Revised and fixed patchset anton.ivanov
  2018-11-13 11:59 ` [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
  2018-11-13 11:59 ` [PATCH 2/4] um: Clean-up command processing in " anton.ivanov
@ 2018-11-13 11:59 ` anton.ivanov
  2018-11-13 13:31   ` Jens Axboe
  2018-11-13 11:59 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
  3 siblings, 1 reply; 25+ messages in thread
From: anton.ivanov @ 2018-11-13 11:59 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Printk out of the io thread has been proven to be unsafe. It is
safer to pass the error to the block IO layer nad let it print
it if need be.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 0f02373ef632..e78f27f5ce88 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1447,11 +1447,8 @@ static int update_bitmap(struct io_thread_req *req)
 
 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
 			  sizeof(req->bitmap_words), req->cow_offset);
-	if(n != sizeof(req->bitmap_words)){
-		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
-		       req->fds[1]);
+	if(n != sizeof(req->bitmap_words))
 		return map_error(-n);
-	}
 
 	return map_error(0);
 }
@@ -1465,12 +1462,7 @@ static void do_io(struct io_thread_req *req)
 
 	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
-		n = os_sync_file(req->fds[0]);
-		if (n != 0) {
-			printk("do_io - sync failed err = %d "
-			       "fd = %d\n", -n, req->fds[0]);
-			req->error = map_error(-n);
-		}
+		req->error = map_error(-os_sync_file(req->fds[0]));
 		return;
 	}
 
@@ -1495,9 +1487,7 @@ static void do_io(struct io_thread_req *req)
 				buf = &buf[n];
 				len -= n;
 				n = os_pread_file(req->fds[bit], buf, len, off);
-				if (n < 0) {
-					printk("do_io - read failed, err = %d "
-					       "fd = %d\n", -n, req->fds[bit]);
+				if(n < 0){
 					req->error = map_error(-n);
 					return;
 				}
@@ -1506,8 +1496,6 @@ static void do_io(struct io_thread_req *req)
 		} else {
 			n = os_pwrite_file(req->fds[bit], buf, len, off);
 			if(n != len){
-				printk("do_io - write failed err = %d "
-				       "fd = %d\n", -n, req->fds[bit]);
 				req->error = map_error(-n);
 				return;
 			}
@@ -1545,11 +1533,6 @@ int io_thread(void *arg)
 			if (n == -EAGAIN) {
 				ubd_read_poll(-1);
 				continue;
-			} else {
-				printk("io_thread - read failed, fd = %d, "
-				       "err = %d,"
-				       "reminder = %d\n",
-				       kernel_fd, -n, io_remainder_size);
 			}
 		}
 
@@ -1564,11 +1547,6 @@ int io_thread(void *arg)
 			res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
 			if (res >= 0) {
 				written += res;
-			} else {
-				if (res != -EAGAIN) {
-					printk("io_thread - write failed, fd = %d, "
-					       "err = %d\n", kernel_fd, -n);
-				}
 			}
 			if (written < n) {
 				ubd_write_poll(-1);
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 11:59 Revised and fixed patchset anton.ivanov
                   ` (2 preceding siblings ...)
  2018-11-13 11:59 ` [PATCH 3/4] um: Remove unsafe printks from the io thread anton.ivanov
@ 2018-11-13 11:59 ` anton.ivanov
  2018-11-13 13:40   ` Jens Axboe
  2018-11-14 15:30   ` Christoph Hellwig
  3 siblings, 2 replies; 25+ messages in thread
From: anton.ivanov @ 2018-11-13 11:59 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Support for DISCARD and WRITE_ZEROES in the ubd driver using
fallocate. DISCARD capable devices are autodetected at open
time.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/Kconfig     | 21 +++++++++++++++
 arch/um/drivers/ubd_kern.c  | 66 ++++++++++++++++++++++++++++++++++++++-------
 arch/um/include/shared/os.h |  1 +
 arch/um/os-Linux/file.c     | 10 +++++++
 4 files changed, 89 insertions(+), 9 deletions(-)

diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index 2b1aaf7755aa..e817fd4a4c30 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -1,4 +1,25 @@
 # SPDX-License-Identifier: GPL-2.0
+menu "UML Block Devices"
+
+config BLK_DEV_UBD
+	bool "UBD Block Device"
+	default y
+	help
+          User Mode Linux virtual block device driver
+
+config BLK_DEV_UBD_SYNC
+	bool "Use Synchronous mode for UBD"
+	default n
+	help
+          Perform all disk operations synchronously (extremely slow).
+
+config BLK_DEV_UBD_DISCARD
+	bool "Enable DISCARD/TRIM support in UBD"
+	default y
+	help
+          Enable discard/trim support. Requires host kernel 3.x or above and
+	  may not be supported on all host filesystems.
+endmenu
 
 menu "UML Character Devices"
 
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index e78f27f5ce88..4fd4cb68f033 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -513,9 +513,13 @@ static void ubd_handler(void)
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 
-			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
-				__blk_mq_end_request(io_req->req, io_req->error);
-
+			if ((io_req->error) || (io_req->buffer == NULL)) {
+				blk_mq_end_request(io_req->req, io_req->error);
+			} else {
+				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
+					__blk_mq_end_request(io_req->req, io_req->error);
+				}
+			}
 			kfree(io_req);
 		}
 	}
@@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 	char **back_ptr;
 	int err, create_cow, *create_ptr;
 	int fd;
+	int data;
 
 	ubd_dev->openflags = ubd_dev->boot_openflags;
 	create_cow = 0;
@@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 		if(err < 0) goto error;
 		ubd_dev->cow.fd = err;
 	}
+#ifdef CONFIG_BLK_DEV_UBD_DISCARD
+	if (ubd_dev->cow.file != NULL)
+		fd = ubd_dev->cow.fd;
+	else
+		fd = ubd_dev->fd;
+	err = os_pread_file(fd, &data, sizeof(int), 0);
+	if (err == sizeof(int)) {
+		err = os_falloc_punch(fd, 0, sizeof(int));
+		if (err == 0) {
+			ubd_dev->queue->limits.discard_granularity = UBD_SECTOR_SIZE;
+			ubd_dev->queue->limits.discard_alignment = UBD_SECTOR_SIZE;
+			blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
+			blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
+			os_pwrite_file(fd, &data, sizeof(int), 0);
+		}
+	}
+#endif
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 	return 0;
  error:
@@ -1368,6 +1390,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 			}
 		}
 		break;
+#ifdef CONFIG_BLK_DEV_UBD_DISCARD
+	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
+		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
+		break;
+#endif
 	default:
 		WARN_ON_ONCE(1);
 		spin_unlock_irq(&ubd_dev->lock);
@@ -1447,7 +1475,7 @@ static int update_bitmap(struct io_thread_req *req)
 
 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
 			  sizeof(req->bitmap_words), req->cow_offset);
-	if(n != sizeof(req->bitmap_words))
+	if (n != sizeof(req->bitmap_words))
 		return map_error(-n);
 
 	return map_error(0);
@@ -1455,11 +1483,13 @@ static int update_bitmap(struct io_thread_req *req)
 
 static void do_io(struct io_thread_req *req)
 {
-	char *buf;
+	char *buf = NULL;
 	unsigned long len;
 	int n, nsectors, start, end, bit;
 	__u64 off;
 
+	/* FLUSH is really a special case, we cannot "case" it with others */
+
 	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
 		req->error = map_error(-os_sync_file(req->fds[0]));
@@ -1479,26 +1509,44 @@ static void do_io(struct io_thread_req *req)
 		off = req->offset + req->offsets[bit] +
 			start * req->sectorsize;
 		len = (end - start) * req->sectorsize;
-		buf = &req->buffer[start * req->sectorsize];
+		if (req->buffer != NULL)
+			buf = &req->buffer[start * req->sectorsize];
 
-		if (req_op(req->req) == REQ_OP_READ) {
+		switch (req_op(req->req)) {
+		case REQ_OP_READ:
 			n = 0;
 			do {
 				buf = &buf[n];
 				len -= n;
 				n = os_pread_file(req->fds[bit], buf, len, off);
-				if(n < 0){
+				if (n < 0) {
 					req->error = map_error(-n);
 					return;
 				}
 			} while((n < len) && (n != 0));
 			if (n < len) memset(&buf[n], 0, len - n);
-		} else {
+			break;
+		case REQ_OP_WRITE:
 			n = os_pwrite_file(req->fds[bit], buf, len, off);
 			if(n != len){
 				req->error = map_error(-n);
 				return;
 			}
+			break;
+#ifdef CONFIG_BLK_DEV_UBD_DISCARD
+		case REQ_OP_DISCARD:
+		case REQ_OP_WRITE_ZEROES:
+			n = os_falloc_punch(req->fds[bit], off, len);
+			if (n) {
+				req->error = map_error(-n);
+				return;
+			}
+			break;
+#endif
+		default:
+			WARN_ON_ONCE(1);
+			req->error = BLK_STS_NOTSUPP;
+			return;
 		}
 
 		start = end;
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 048ae37eb5aa..ebf23012a59b 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -175,6 +175,7 @@ extern int os_fchange_dir(int fd);
 extern unsigned os_major(unsigned long long dev);
 extern unsigned os_minor(unsigned long long dev);
 extern unsigned long long os_makedev(unsigned major, unsigned minor);
+extern int os_falloc_punch(int fd, unsigned long long offset, int count);
 
 /* start_up.c */
 extern void os_early_checks(void);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index c0197097c86e..f25b110d4e70 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -610,3 +610,13 @@ unsigned long long os_makedev(unsigned major, unsigned minor)
 {
 	return makedev(major, minor);
 }
+
+int os_falloc_punch(int fd, unsigned long long offset, int len)
+{
+	int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/4] um: Remove unsafe printks from the io thread
  2018-11-13 11:59 ` [PATCH 3/4] um: Remove unsafe printks from the io thread anton.ivanov
@ 2018-11-13 13:31   ` Jens Axboe
  0 siblings, 0 replies; 25+ messages in thread
From: Jens Axboe @ 2018-11-13 13:31 UTC (permalink / raw)
  To: anton.ivanov, linux-um; +Cc: richard, hch

On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> Printk out of the io thread has been proven to be unsafe. It is
> safer to pass the error to the block IO layer nad let it print
> it if need be.

As per previous message, I don't think this is a good idea at all.
It'd be much better to fix the real issue, not work around it.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver
  2018-11-13 11:59 ` [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
@ 2018-11-13 13:33   ` Jens Axboe
  2018-11-14 15:32   ` Christoph Hellwig
  1 sibling, 0 replies; 25+ messages in thread
From: Jens Axboe @ 2018-11-13 13:33 UTC (permalink / raw)
  To: anton.ivanov, linux-um; +Cc: richard, hch

On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> Switch to block mq-constants for both commands and error codes.

This one looks good to me.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/4] um: Clean-up command processing in UML UBD driver
  2018-11-13 11:59 ` [PATCH 2/4] um: Clean-up command processing in " anton.ivanov
@ 2018-11-13 13:34   ` Jens Axboe
  2018-11-14 15:28     ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-13 13:34 UTC (permalink / raw)
  To: anton.ivanov, linux-um; +Cc: richard, hch

On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> Clean-up command processing and return BLK_STS_NOTSUP for
> uknown commands.
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>  arch/um/drivers/ubd_kern.c | 64 ++++++++++++++++++++++++++++------------------
>  1 file changed, 39 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
> index 331837f1f632..0f02373ef632 100644
> --- a/arch/um/drivers/ubd_kern.c
> +++ b/arch/um/drivers/ubd_kern.c
> @@ -1307,21 +1307,25 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
>  		io_req->fds[0] = dev->fd;
>  	io_req->error = 0;
>  
> -	if (req_op(req) != REQ_OP_FLUSH) {
> -		io_req->fds[1] = dev->fd;
> -		io_req->cow_offset = -1;
> -		io_req->offset = off;
> +	if (bvec != NULL) {
>  		io_req->length = bvec->bv_len;
> -		io_req->sector_mask = 0;
> -		io_req->offsets[0] = 0;
> -		io_req->offsets[1] = dev->cow.data_offset;
>  		io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
> -		io_req->sectorsize = 1 << 9;
> +	} else {
> +		io_req->buffer = NULL;
> +		io_req->length = blk_rq_bytes(req);
> +	}
>  
> -		if (dev->cow.file) {
> -			cowify_req(io_req, dev->cow.bitmap,
> -				   dev->cow.bitmap_offset, dev->cow.bitmap_len);
> -		}
> +	io_req->sectorsize = UBD_SECTOR_SIZE;
> +	io_req->fds[1] = dev->fd;
> +	io_req->cow_offset = -1;
> +	io_req->offset = off;
> +	io_req->sector_mask = 0;
> +	io_req->offsets[0] = 0;
> +	io_req->offsets[1] = dev->cow.data_offset;
> +
> +	if (dev->cow.file) {
> +		cowify_req(io_req, dev->cow.bitmap,
> +			   dev->cow.bitmap_offset, dev->cow.bitmap_len);
>  	}
>  
>  	ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
> @@ -1330,7 +1334,6 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
>  			pr_err("write to io thread failed: %d\n", -ret);
>  		kfree(io_req);
>  	}
> -
>  	return ret;
>  }
>  
> @@ -1344,20 +1347,31 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	blk_mq_start_request(req);
>  
>  	spin_lock_irq(&ubd_dev->lock);
> -
> -	if (req_op(req) == REQ_OP_FLUSH) {
> +	switch (req_op(req)) {
> +	/* operations with no lentgth/offset arguments */
> +	case REQ_OP_FLUSH:
>  		ret = ubd_queue_one_vec(hctx, req, 0, NULL);
> -	} else {
> -		struct req_iterator iter;
> -		struct bio_vec bvec;
> -		u64 off = (u64)blk_rq_pos(req) << 9;
> -
> -		rq_for_each_segment(bvec, req, iter) {
> -			ret = ubd_queue_one_vec(hctx, req, off, &bvec);
> -			if (ret < 0)
> -				goto out;
> -			off += bvec.bv_len;
> +		break;
> +	/* operations with bio_vec arguments */
> +	case REQ_OP_READ:
> +	case REQ_OP_WRITE:
> +		{
> +			struct req_iterator iter;
> +			struct bio_vec bvec;
> +			u64 off = (u64)blk_rq_pos(req) << 9;
> +
> +			rq_for_each_segment(bvec, req, iter) {
> +				ret = ubd_queue_one_vec(hctx, req, off, &bvec);
> +				if (ret < 0)
> +					goto out;
> +				off += bvec.bv_len;
> +			}
>  		}
> +		break;

This indentation is wrong/awkward. The usual style is:

	case REQ_OP_READ:
	case REQ_OP_WRITE: {
		struct req_iterator iter;
		struct bio_vec bvec;
		u64 off = (u64)blk_rq_pos(req) << 9;

		rq_for_each_segment(bvec, req, iter) {
			ret = ubd_queue_one_vec(hctx, req, off, &bvec);
			if (ret < 0)
				goto out;
			off += bvec.bv_len;
		}
		break;
		}

Apart from that, looks good to me.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 11:59 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
@ 2018-11-13 13:40   ` Jens Axboe
  2018-11-13 14:04     ` Anton Ivanov
  2018-11-14 15:30   ` Christoph Hellwig
  1 sibling, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-13 13:40 UTC (permalink / raw)
  To: anton.ivanov, linux-um; +Cc: richard, hch

On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
> index 2b1aaf7755aa..e817fd4a4c30 100644
> --- a/arch/um/drivers/Kconfig
> +++ b/arch/um/drivers/Kconfig
> @@ -1,4 +1,25 @@
>  # SPDX-License-Identifier: GPL-2.0
> +menu "UML Block Devices"
> +
> +config BLK_DEV_UBD
> +	bool "UBD Block Device"
> +	default y
> +	help
> +          User Mode Linux virtual block device driver
> +
> +config BLK_DEV_UBD_SYNC
> +	bool "Use Synchronous mode for UBD"
> +	default n
> +	help
> +          Perform all disk operations synchronously (extremely slow).
> +
> +config BLK_DEV_UBD_DISCARD
> +	bool "Enable DISCARD/TRIM support in UBD"
> +	default y
> +	help
> +          Enable discard/trim support. Requires host kernel 3.x or above and
> +	  may not be supported on all host filesystems.
> +endmenu

kconfig entries are horrible ways to manage this. A module parameter
would be better. The latter also gets rid of the various ifdefs.

> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
> index e78f27f5ce88..4fd4cb68f033 100644
> --- a/arch/um/drivers/ubd_kern.c
> +++ b/arch/um/drivers/ubd_kern.c
> @@ -513,9 +513,13 @@ static void ubd_handler(void)
>  		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>  			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>  
> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
> -				__blk_mq_end_request(io_req->req, io_req->error);
> -
> +			if ((io_req->error) || (io_req->buffer == NULL)) {
> +				blk_mq_end_request(io_req->req, io_req->error);
> +			} else {
> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
> +					__blk_mq_end_request(io_req->req, io_req->error);
> +				}
> +			}
>  			kfree(io_req);

This looks odd, why is this change necessary? Also a lot of unneeded
parentheses.

>  		}
>  	}
> @@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>  	char **back_ptr;
>  	int err, create_cow, *create_ptr;
>  	int fd;
> +	int data;
>  
>  	ubd_dev->openflags = ubd_dev->boot_openflags;
>  	create_cow = 0;
> @@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>  		if(err < 0) goto error;
>  		ubd_dev->cow.fd = err;
>  	}
> +#ifdef CONFIG_BLK_DEV_UBD_DISCARD
> +	if (ubd_dev->cow.file != NULL)
> +		fd = ubd_dev->cow.fd;
> +	else
> +		fd = ubd_dev->fd;
> +	err = os_pread_file(fd, &data, sizeof(int), 0);
> +	if (err == sizeof(int)) {
> +		err = os_falloc_punch(fd, 0, sizeof(int));
> +		if (err == 0) {

It's not clear to me how it's safe to punch a 4-byte hole in the file as
a test?


-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 13:40   ` Jens Axboe
@ 2018-11-13 14:04     ` Anton Ivanov
  2018-11-13 15:18       ` Jens Axboe
  0 siblings, 1 reply; 25+ messages in thread
From: Anton Ivanov @ 2018-11-13 14:04 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch

On 13/11/2018 13:40, Jens Axboe wrote:
> On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
>> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
>> index 2b1aaf7755aa..e817fd4a4c30 100644
>> --- a/arch/um/drivers/Kconfig
>> +++ b/arch/um/drivers/Kconfig
>> @@ -1,4 +1,25 @@
>>   # SPDX-License-Identifier: GPL-2.0
>> +menu "UML Block Devices"
>> +
>> +config BLK_DEV_UBD
>> +	bool "UBD Block Device"
>> +	default y
>> +	help
>> +          User Mode Linux virtual block device driver
>> +
>> +config BLK_DEV_UBD_SYNC
>> +	bool "Use Synchronous mode for UBD"
>> +	default n
>> +	help
>> +          Perform all disk operations synchronously (extremely slow).
>> +
>> +config BLK_DEV_UBD_DISCARD
>> +	bool "Enable DISCARD/TRIM support in UBD"
>> +	default y
>> +	help
>> +          Enable discard/trim support. Requires host kernel 3.x or above and
>> +	  may not be supported on all host filesystems.
>> +endmenu
> kconfig entries are horrible ways to manage this. A module parameter
> would be better. The latter also gets rid of the various ifdefs.

I completely agree with you. Unfortunately, UBD for a number of reasons 
cannot be a module at present. It contains "host" side and loading 
something from the "inside" which breaks the hypervisor boundary is a 
definitive no go.

In order to implement the suggestion we need the concept of loadable 
host side modules and while that is possible the likelihood of someone 
doing all the work on this one at present is rather low.

>
>> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
>> index e78f27f5ce88..4fd4cb68f033 100644
>> --- a/arch/um/drivers/ubd_kern.c
>> +++ b/arch/um/drivers/ubd_kern.c
>> @@ -513,9 +513,13 @@ static void ubd_handler(void)
>>   		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>>   			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>>   
>> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>> -				__blk_mq_end_request(io_req->req, io_req->error);
>> -
>> +			if ((io_req->error) || (io_req->buffer == NULL)) {
>> +				blk_mq_end_request(io_req->req, io_req->error);
>> +			} else {
>> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
>> +					__blk_mq_end_request(io_req->req, io_req->error);
>> +				}
>> +			}
>>   			kfree(io_req);
> This looks odd, why is this change necessary? Also a lot of unneeded
> parentheses.

Loop and NBD do it that way. I can retest with the original code now 
that the underlying culprit is known and resubmit if that works.

>
>>   		}
>>   	}
>> @@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>   	char **back_ptr;
>>   	int err, create_cow, *create_ptr;
>>   	int fd;
>> +	int data;
>>   
>>   	ubd_dev->openflags = ubd_dev->boot_openflags;
>>   	create_cow = 0;
>> @@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>   		if(err < 0) goto error;
>>   		ubd_dev->cow.fd = err;
>>   	}
>> +#ifdef CONFIG_BLK_DEV_UBD_DISCARD
>> +	if (ubd_dev->cow.file != NULL)
>> +		fd = ubd_dev->cow.fd;
>> +	else
>> +		fd = ubd_dev->fd;
>> +	err = os_pread_file(fd, &data, sizeof(int), 0);
>> +	if (err == sizeof(int)) {
>> +		err = os_falloc_punch(fd, 0, sizeof(int));
>> +		if (err == 0) {
> It's not clear to me how it's safe to punch a 4-byte hole in the file as
> a test?

A sizeof(int) sized hole is always implemented by simply writing zeroes. 
There is no fs manipulation here.

The test reads the contents of the file, punches a hoile and writes the 
content back if the hole was punched successfully so the file is in its 
orginal state.

Life would have been much easier if fallocate did not return -EINVAL on 
a 0 sized request. Unfortunately it does so any test has no choice but 
to punch a hole for real and fix the damage after that.

The location is completely safe for most UMLs UBDs. The most common 
method of building images is to format a device directly with ext2/3/4 - 
there is no partition table. The first sector in ext2/3/4 if memory 
serves me right is unused. Other fs - less so. Partition table - totally 
safe as the boot code portion is unused.

In any case, the code restores the data to its pre-test state.

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 14:04     ` Anton Ivanov
@ 2018-11-13 15:18       ` Jens Axboe
  2018-11-13 15:32         ` Anton Ivanov
  0 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-13 15:18 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard, hch

On 11/13/18 7:04 AM, Anton Ivanov wrote:
> On 13/11/2018 13:40, Jens Axboe wrote:
>> On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
>>> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
>>> index 2b1aaf7755aa..e817fd4a4c30 100644
>>> --- a/arch/um/drivers/Kconfig
>>> +++ b/arch/um/drivers/Kconfig
>>> @@ -1,4 +1,25 @@
>>>   # SPDX-License-Identifier: GPL-2.0
>>> +menu "UML Block Devices"
>>> +
>>> +config BLK_DEV_UBD
>>> +	bool "UBD Block Device"
>>> +	default y
>>> +	help
>>> +          User Mode Linux virtual block device driver
>>> +
>>> +config BLK_DEV_UBD_SYNC
>>> +	bool "Use Synchronous mode for UBD"
>>> +	default n
>>> +	help
>>> +          Perform all disk operations synchronously (extremely slow).
>>> +
>>> +config BLK_DEV_UBD_DISCARD
>>> +	bool "Enable DISCARD/TRIM support in UBD"
>>> +	default y
>>> +	help
>>> +          Enable discard/trim support. Requires host kernel 3.x or above and
>>> +	  may not be supported on all host filesystems.
>>> +endmenu
>> kconfig entries are horrible ways to manage this. A module parameter
>> would be better. The latter also gets rid of the various ifdefs.
> 
> I completely agree with you. Unfortunately, UBD for a number of reasons 
> cannot be a module at present. It contains "host" side and loading 
> something from the "inside" which breaks the hypervisor boundary is a 
> definitive no go.
> 
> In order to implement the suggestion we need the concept of loadable 
> host side modules and while that is possible the likelihood of someone 
> doing all the work on this one at present is rather low.

Module parameters work for builtin as well, then you just add
ubd.discard=true to the kernel command line.

>>> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
>>> index e78f27f5ce88..4fd4cb68f033 100644
>>> --- a/arch/um/drivers/ubd_kern.c
>>> +++ b/arch/um/drivers/ubd_kern.c
>>> @@ -513,9 +513,13 @@ static void ubd_handler(void)
>>>   		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>>>   			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>>>   
>>> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>>> -				__blk_mq_end_request(io_req->req, io_req->error);
>>> -
>>> +			if ((io_req->error) || (io_req->buffer == NULL)) {
>>> +				blk_mq_end_request(io_req->req, io_req->error);
>>> +			} else {
>>> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
>>> +					__blk_mq_end_request(io_req->req, io_req->error);
>>> +				}
>>> +			}
>>>   			kfree(io_req);
>> This looks odd, why is this change necessary? Also a lot of unneeded
>> parentheses.
> 
> Loop and NBD do it that way. I can retest with the original code now 
> that the underlying culprit is known and resubmit if that works.

I missed the __ part, yes it actually looks fine. blk_mq_end_request() will
both end the bio parts and free the request, your latter part does incremental
completes.

>>> @@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>   	char **back_ptr;
>>>   	int err, create_cow, *create_ptr;
>>>   	int fd;
>>> +	int data;
>>>   
>>>   	ubd_dev->openflags = ubd_dev->boot_openflags;
>>>   	create_cow = 0;
>>> @@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>   		if(err < 0) goto error;
>>>   		ubd_dev->cow.fd = err;
>>>   	}
>>> +#ifdef CONFIG_BLK_DEV_UBD_DISCARD
>>> +	if (ubd_dev->cow.file != NULL)
>>> +		fd = ubd_dev->cow.fd;
>>> +	else
>>> +		fd = ubd_dev->fd;
>>> +	err = os_pread_file(fd, &data, sizeof(int), 0);
>>> +	if (err == sizeof(int)) {
>>> +		err = os_falloc_punch(fd, 0, sizeof(int));
>>> +		if (err == 0) {
>> It's not clear to me how it's safe to punch a 4-byte hole in the file as
>> a test?
> 
> A sizeof(int) sized hole is always implemented by simply writing zeroes. 
> There is no fs manipulation here.
> 
> The test reads the contents of the file, punches a hoile and writes the 
> content back if the hole was punched successfully so the file is in its 
> orginal state.
> 
> Life would have been much easier if fallocate did not return -EINVAL on 
> a 0 sized request. Unfortunately it does so any test has no choice but 
> to punch a hole for real and fix the damage after that.
> 
> The location is completely safe for most UMLs UBDs. The most common 
> method of building images is to format a device directly with ext2/3/4 - 
> there is no partition table. The first sector in ext2/3/4 if memory 
> serves me right is unused. Other fs - less so. Partition table - totally 
> safe as the boot code portion is unused.
> 
> In any case, the code restores the data to its pre-test state.

It's a bit nasty... You'd probably want to do a full sector though, the
fs isn't going to be issuing a trim for anything that small.

What happens if the system crashes before you successfully write back
the data you zeroed? Doesn't seem like a really safe way to do this.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 15:18       ` Jens Axboe
@ 2018-11-13 15:32         ` Anton Ivanov
  2018-11-13 15:49           ` Jens Axboe
  0 siblings, 1 reply; 25+ messages in thread
From: Anton Ivanov @ 2018-11-13 15:32 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch


On 11/13/18 3:18 PM, Jens Axboe wrote:
> On 11/13/18 7:04 AM, Anton Ivanov wrote:
>> On 13/11/2018 13:40, Jens Axboe wrote:
>>> On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
>>>> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
>>>> index 2b1aaf7755aa..e817fd4a4c30 100644
>>>> --- a/arch/um/drivers/Kconfig
>>>> +++ b/arch/um/drivers/Kconfig
>>>> @@ -1,4 +1,25 @@
>>>>    # SPDX-License-Identifier: GPL-2.0
>>>> +menu "UML Block Devices"
>>>> +
>>>> +config BLK_DEV_UBD
>>>> +	bool "UBD Block Device"
>>>> +	default y
>>>> +	help
>>>> +          User Mode Linux virtual block device driver
>>>> +
>>>> +config BLK_DEV_UBD_SYNC
>>>> +	bool "Use Synchronous mode for UBD"
>>>> +	default n
>>>> +	help
>>>> +          Perform all disk operations synchronously (extremely slow).
>>>> +
>>>> +config BLK_DEV_UBD_DISCARD
>>>> +	bool "Enable DISCARD/TRIM support in UBD"
>>>> +	default y
>>>> +	help
>>>> +          Enable discard/trim support. Requires host kernel 3.x or above and
>>>> +	  may not be supported on all host filesystems.
>>>> +endmenu
>>> kconfig entries are horrible ways to manage this. A module parameter
>>> would be better. The latter also gets rid of the various ifdefs.
>> I completely agree with you. Unfortunately, UBD for a number of reasons
>> cannot be a module at present. It contains "host" side and loading
>> something from the "inside" which breaks the hypervisor boundary is a
>> definitive no go.
>>
>> In order to implement the suggestion we need the concept of loadable
>> host side modules and while that is possible the likelihood of someone
>> doing all the work on this one at present is rather low.
> Module parameters work for builtin as well, then you just add
> ubd.discard=true to the kernel command line.
>
>>>> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
>>>> index e78f27f5ce88..4fd4cb68f033 100644
>>>> --- a/arch/um/drivers/ubd_kern.c
>>>> +++ b/arch/um/drivers/ubd_kern.c
>>>> @@ -513,9 +513,13 @@ static void ubd_handler(void)
>>>>    		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>>>>    			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>>>>    
>>>> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>>>> -				__blk_mq_end_request(io_req->req, io_req->error);
>>>> -
>>>> +			if ((io_req->error) || (io_req->buffer == NULL)) {
>>>> +				blk_mq_end_request(io_req->req, io_req->error);
>>>> +			} else {
>>>> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
>>>> +					__blk_mq_end_request(io_req->req, io_req->error);
>>>> +				}
>>>> +			}
>>>>    			kfree(io_req);
>>> This looks odd, why is this change necessary? Also a lot of unneeded
>>> parentheses.
>> Loop and NBD do it that way. I can retest with the original code now
>> that the underlying culprit is known and resubmit if that works.
> I missed the __ part, yes it actually looks fine. blk_mq_end_request() will
> both end the bio parts and free the request, your latter part does incremental
> completes.
>
>>>> @@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>>    	char **back_ptr;
>>>>    	int err, create_cow, *create_ptr;
>>>>    	int fd;
>>>> +	int data;
>>>>    
>>>>    	ubd_dev->openflags = ubd_dev->boot_openflags;
>>>>    	create_cow = 0;
>>>> @@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>>    		if(err < 0) goto error;
>>>>    		ubd_dev->cow.fd = err;
>>>>    	}
>>>> +#ifdef CONFIG_BLK_DEV_UBD_DISCARD
>>>> +	if (ubd_dev->cow.file != NULL)
>>>> +		fd = ubd_dev->cow.fd;
>>>> +	else
>>>> +		fd = ubd_dev->fd;
>>>> +	err = os_pread_file(fd, &data, sizeof(int), 0);
>>>> +	if (err == sizeof(int)) {
>>>> +		err = os_falloc_punch(fd, 0, sizeof(int));
>>>> +		if (err == 0) {
>>> It's not clear to me how it's safe to punch a 4-byte hole in the file as
>>> a test?
>> A sizeof(int) sized hole is always implemented by simply writing zeroes.
>> There is no fs manipulation here.
>>
>> The test reads the contents of the file, punches a hoile and writes the
>> content back if the hole was punched successfully so the file is in its
>> orginal state.
>>
>> Life would have been much easier if fallocate did not return -EINVAL on
>> a 0 sized request. Unfortunately it does so any test has no choice but
>> to punch a hole for real and fix the damage after that.
>>
>> The location is completely safe for most UMLs UBDs. The most common
>> method of building images is to format a device directly with ext2/3/4 -
>> there is no partition table. The first sector in ext2/3/4 if memory
>> serves me right is unused. Other fs - less so. Partition table - totally
>> safe as the boot code portion is unused.
>>
>> In any case, the code restores the data to its pre-test state.
> It's a bit nasty... You'd probably want to do a full sector though, the
> fs isn't going to be issuing a trim for anything that small.
>
> What happens if the system crashes before you successfully write back
> the data you zeroed? Doesn't seem like a really safe way to do this.

There is no safe way to do this as far as I can see :)

So if any probing is to be done it is inherently unsafe courtesy of 
fallocate returning EINVAL on zero sized requests.

IMHO there is no point to do probing to start off with. Issuing a trim 
if the underlying fs does not support it does not result in anything 
visible to the user. I tried to trim ext4 images sitting on a MSDOS 
filesystem - there is no issues as the error is eaten somewhere inside 
the kernel. Nothing showing up to the user and no visible damage :)

However, Richard asked for a probe. That is how a reliable probe would 
look like. I am not happy with it either and I think it is too risky for 
a lot of scenarios. If we are to probe, we will have to keep some piece 
of similar code in.

-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 15:32         ` Anton Ivanov
@ 2018-11-13 15:49           ` Jens Axboe
  2018-11-13 15:52             ` Anton Ivanov
  0 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-13 15:49 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard, hch

On 11/13/18 8:32 AM, Anton Ivanov wrote:
> 
> On 11/13/18 3:18 PM, Jens Axboe wrote:
>> On 11/13/18 7:04 AM, Anton Ivanov wrote:
>>> On 13/11/2018 13:40, Jens Axboe wrote:
>>>> On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
>>>>> index 2b1aaf7755aa..e817fd4a4c30 100644
>>>>> --- a/arch/um/drivers/Kconfig
>>>>> +++ b/arch/um/drivers/Kconfig
>>>>> @@ -1,4 +1,25 @@
>>>>>    # SPDX-License-Identifier: GPL-2.0
>>>>> +menu "UML Block Devices"
>>>>> +
>>>>> +config BLK_DEV_UBD
>>>>> +	bool "UBD Block Device"
>>>>> +	default y
>>>>> +	help
>>>>> +          User Mode Linux virtual block device driver
>>>>> +
>>>>> +config BLK_DEV_UBD_SYNC
>>>>> +	bool "Use Synchronous mode for UBD"
>>>>> +	default n
>>>>> +	help
>>>>> +          Perform all disk operations synchronously (extremely slow).
>>>>> +
>>>>> +config BLK_DEV_UBD_DISCARD
>>>>> +	bool "Enable DISCARD/TRIM support in UBD"
>>>>> +	default y
>>>>> +	help
>>>>> +          Enable discard/trim support. Requires host kernel 3.x or above and
>>>>> +	  may not be supported on all host filesystems.
>>>>> +endmenu
>>>> kconfig entries are horrible ways to manage this. A module parameter
>>>> would be better. The latter also gets rid of the various ifdefs.
>>> I completely agree with you. Unfortunately, UBD for a number of reasons
>>> cannot be a module at present. It contains "host" side and loading
>>> something from the "inside" which breaks the hypervisor boundary is a
>>> definitive no go.
>>>
>>> In order to implement the suggestion we need the concept of loadable
>>> host side modules and while that is possible the likelihood of someone
>>> doing all the work on this one at present is rather low.
>> Module parameters work for builtin as well, then you just add
>> ubd.discard=true to the kernel command line.
>>
>>>>> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
>>>>> index e78f27f5ce88..4fd4cb68f033 100644
>>>>> --- a/arch/um/drivers/ubd_kern.c
>>>>> +++ b/arch/um/drivers/ubd_kern.c
>>>>> @@ -513,9 +513,13 @@ static void ubd_handler(void)
>>>>>    		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>>>>>    			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>>>>>    
>>>>> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>>>>> -				__blk_mq_end_request(io_req->req, io_req->error);
>>>>> -
>>>>> +			if ((io_req->error) || (io_req->buffer == NULL)) {
>>>>> +				blk_mq_end_request(io_req->req, io_req->error);
>>>>> +			} else {
>>>>> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
>>>>> +					__blk_mq_end_request(io_req->req, io_req->error);
>>>>> +				}
>>>>> +			}
>>>>>    			kfree(io_req);
>>>> This looks odd, why is this change necessary? Also a lot of unneeded
>>>> parentheses.
>>> Loop and NBD do it that way. I can retest with the original code now
>>> that the underlying culprit is known and resubmit if that works.
>> I missed the __ part, yes it actually looks fine. blk_mq_end_request() will
>> both end the bio parts and free the request, your latter part does incremental
>> completes.
>>
>>>>> @@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>>>    	char **back_ptr;
>>>>>    	int err, create_cow, *create_ptr;
>>>>>    	int fd;
>>>>> +	int data;
>>>>>    
>>>>>    	ubd_dev->openflags = ubd_dev->boot_openflags;
>>>>>    	create_cow = 0;
>>>>> @@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>>>    		if(err < 0) goto error;
>>>>>    		ubd_dev->cow.fd = err;
>>>>>    	}
>>>>> +#ifdef CONFIG_BLK_DEV_UBD_DISCARD
>>>>> +	if (ubd_dev->cow.file != NULL)
>>>>> +		fd = ubd_dev->cow.fd;
>>>>> +	else
>>>>> +		fd = ubd_dev->fd;
>>>>> +	err = os_pread_file(fd, &data, sizeof(int), 0);
>>>>> +	if (err == sizeof(int)) {
>>>>> +		err = os_falloc_punch(fd, 0, sizeof(int));
>>>>> +		if (err == 0) {
>>>> It's not clear to me how it's safe to punch a 4-byte hole in the file as
>>>> a test?
>>> A sizeof(int) sized hole is always implemented by simply writing zeroes.
>>> There is no fs manipulation here.
>>>
>>> The test reads the contents of the file, punches a hoile and writes the
>>> content back if the hole was punched successfully so the file is in its
>>> orginal state.
>>>
>>> Life would have been much easier if fallocate did not return -EINVAL on
>>> a 0 sized request. Unfortunately it does so any test has no choice but
>>> to punch a hole for real and fix the damage after that.
>>>
>>> The location is completely safe for most UMLs UBDs. The most common
>>> method of building images is to format a device directly with ext2/3/4 -
>>> there is no partition table. The first sector in ext2/3/4 if memory
>>> serves me right is unused. Other fs - less so. Partition table - totally
>>> safe as the boot code portion is unused.
>>>
>>> In any case, the code restores the data to its pre-test state.
>> It's a bit nasty... You'd probably want to do a full sector though, the
>> fs isn't going to be issuing a trim for anything that small.
>>
>> What happens if the system crashes before you successfully write back
>> the data you zeroed? Doesn't seem like a really safe way to do this.
> 
> There is no safe way to do this as far as I can see :)
> 
> So if any probing is to be done it is inherently unsafe courtesy of 
> fallocate returning EINVAL on zero sized requests.
> 
> IMHO there is no point to do probing to start off with. Issuing a trim 
> if the underlying fs does not support it does not result in anything 
> visible to the user. I tried to trim ext4 images sitting on a MSDOS 
> filesystem - there is no issues as the error is eaten somewhere inside 
> the kernel. Nothing showing up to the user and no visible damage :)
> 
> However, Richard asked for a probe. That is how a reliable probe would 
> look like. I am not happy with it either and I think it is too risky for 
> a lot of scenarios. If we are to probe, we will have to keep some piece 
> of similar code in.

So here's what I suggest - default to discard being available, don't
offer WRITE_ZEROES support. Don't set the flag that tells the block
layer that discards guarantees zeroes. Have an internal flag that is set
when discards are enabled. If a discard fails, then clear your internal
flag and the queue support. You must still expect discards to arrive, if
they do and your internal discard flag is not set, you just end them,
not in error.

That'll just treat discards as hints, and you don't have to have any
broken probe logic.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 15:49           ` Jens Axboe
@ 2018-11-13 15:52             ` Anton Ivanov
  0 siblings, 0 replies; 25+ messages in thread
From: Anton Ivanov @ 2018-11-13 15:52 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch

On 13/11/2018 15:49, Jens Axboe wrote:
> On 11/13/18 8:32 AM, Anton Ivanov wrote:
>> On 11/13/18 3:18 PM, Jens Axboe wrote:
>>> On 11/13/18 7:04 AM, Anton Ivanov wrote:
>>>> On 13/11/2018 13:40, Jens Axboe wrote:
>>>>> On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
>>>>>> index 2b1aaf7755aa..e817fd4a4c30 100644
>>>>>> --- a/arch/um/drivers/Kconfig
>>>>>> +++ b/arch/um/drivers/Kconfig
>>>>>> @@ -1,4 +1,25 @@
>>>>>>     # SPDX-License-Identifier: GPL-2.0
>>>>>> +menu "UML Block Devices"
>>>>>> +
>>>>>> +config BLK_DEV_UBD
>>>>>> +	bool "UBD Block Device"
>>>>>> +	default y
>>>>>> +	help
>>>>>> +          User Mode Linux virtual block device driver
>>>>>> +
>>>>>> +config BLK_DEV_UBD_SYNC
>>>>>> +	bool "Use Synchronous mode for UBD"
>>>>>> +	default n
>>>>>> +	help
>>>>>> +          Perform all disk operations synchronously (extremely slow).
>>>>>> +
>>>>>> +config BLK_DEV_UBD_DISCARD
>>>>>> +	bool "Enable DISCARD/TRIM support in UBD"
>>>>>> +	default y
>>>>>> +	help
>>>>>> +          Enable discard/trim support. Requires host kernel 3.x or above and
>>>>>> +	  may not be supported on all host filesystems.
>>>>>> +endmenu
>>>>> kconfig entries are horrible ways to manage this. A module parameter
>>>>> would be better. The latter also gets rid of the various ifdefs.
>>>> I completely agree with you. Unfortunately, UBD for a number of reasons
>>>> cannot be a module at present. It contains "host" side and loading
>>>> something from the "inside" which breaks the hypervisor boundary is a
>>>> definitive no go.
>>>>
>>>> In order to implement the suggestion we need the concept of loadable
>>>> host side modules and while that is possible the likelihood of someone
>>>> doing all the work on this one at present is rather low.
>>> Module parameters work for builtin as well, then you just add
>>> ubd.discard=true to the kernel command line.
>>>
>>>>>> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
>>>>>> index e78f27f5ce88..4fd4cb68f033 100644
>>>>>> --- a/arch/um/drivers/ubd_kern.c
>>>>>> +++ b/arch/um/drivers/ubd_kern.c
>>>>>> @@ -513,9 +513,13 @@ static void ubd_handler(void)
>>>>>>     		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>>>>>>     			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>>>>>>     
>>>>>> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>>>>>> -				__blk_mq_end_request(io_req->req, io_req->error);
>>>>>> -
>>>>>> +			if ((io_req->error) || (io_req->buffer == NULL)) {
>>>>>> +				blk_mq_end_request(io_req->req, io_req->error);
>>>>>> +			} else {
>>>>>> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
>>>>>> +					__blk_mq_end_request(io_req->req, io_req->error);
>>>>>> +				}
>>>>>> +			}
>>>>>>     			kfree(io_req);
>>>>> This looks odd, why is this change necessary? Also a lot of unneeded
>>>>> parentheses.
>>>> Loop and NBD do it that way. I can retest with the original code now
>>>> that the underlying culprit is known and resubmit if that works.
>>> I missed the __ part, yes it actually looks fine. blk_mq_end_request() will
>>> both end the bio parts and free the request, your latter part does incremental
>>> completes.
>>>
>>>>>> @@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>>>>     	char **back_ptr;
>>>>>>     	int err, create_cow, *create_ptr;
>>>>>>     	int fd;
>>>>>> +	int data;
>>>>>>     
>>>>>>     	ubd_dev->openflags = ubd_dev->boot_openflags;
>>>>>>     	create_cow = 0;
>>>>>> @@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>>>>     		if(err < 0) goto error;
>>>>>>     		ubd_dev->cow.fd = err;
>>>>>>     	}
>>>>>> +#ifdef CONFIG_BLK_DEV_UBD_DISCARD
>>>>>> +	if (ubd_dev->cow.file != NULL)
>>>>>> +		fd = ubd_dev->cow.fd;
>>>>>> +	else
>>>>>> +		fd = ubd_dev->fd;
>>>>>> +	err = os_pread_file(fd, &data, sizeof(int), 0);
>>>>>> +	if (err == sizeof(int)) {
>>>>>> +		err = os_falloc_punch(fd, 0, sizeof(int));
>>>>>> +		if (err == 0) {
>>>>> It's not clear to me how it's safe to punch a 4-byte hole in the file as
>>>>> a test?
>>>> A sizeof(int) sized hole is always implemented by simply writing zeroes.
>>>> There is no fs manipulation here.
>>>>
>>>> The test reads the contents of the file, punches a hoile and writes the
>>>> content back if the hole was punched successfully so the file is in its
>>>> orginal state.
>>>>
>>>> Life would have been much easier if fallocate did not return -EINVAL on
>>>> a 0 sized request. Unfortunately it does so any test has no choice but
>>>> to punch a hole for real and fix the damage after that.
>>>>
>>>> The location is completely safe for most UMLs UBDs. The most common
>>>> method of building images is to format a device directly with ext2/3/4 -
>>>> there is no partition table. The first sector in ext2/3/4 if memory
>>>> serves me right is unused. Other fs - less so. Partition table - totally
>>>> safe as the boot code portion is unused.
>>>>
>>>> In any case, the code restores the data to its pre-test state.
>>> It's a bit nasty... You'd probably want to do a full sector though, the
>>> fs isn't going to be issuing a trim for anything that small.
>>>
>>> What happens if the system crashes before you successfully write back
>>> the data you zeroed? Doesn't seem like a really safe way to do this.
>> There is no safe way to do this as far as I can see :)
>>
>> So if any probing is to be done it is inherently unsafe courtesy of
>> fallocate returning EINVAL on zero sized requests.
>>
>> IMHO there is no point to do probing to start off with. Issuing a trim
>> if the underlying fs does not support it does not result in anything
>> visible to the user. I tried to trim ext4 images sitting on a MSDOS
>> filesystem - there is no issues as the error is eaten somewhere inside
>> the kernel. Nothing showing up to the user and no visible damage :)
>>
>> However, Richard asked for a probe. That is how a reliable probe would
>> look like. I am not happy with it either and I think it is too risky for
>> a lot of scenarios. If we are to probe, we will have to keep some piece
>> of similar code in.
> So here's what I suggest - default to discard being available, don't
> offer WRITE_ZEROES support. Don't set the flag that tells the block
> layer that discards guarantees zeroes. Have an internal flag that is set
> when discards are enabled. If a discard fails, then clear your internal
> flag and the queue support. You must still expect discards to arrive, if
> they do and your internal discard flag is not set, you just end them,
> not in error.
>
> That'll just treat discards as hints, and you don't have to have any
> broken probe logic.
>
Sounds good. I will implement this round of comments, retest and 
resubmit the series tomorrow.

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/4] um: Clean-up command processing in UML UBD driver
  2018-11-13 13:34   ` Jens Axboe
@ 2018-11-14 15:28     ` Christoph Hellwig
  2018-11-14 15:31       ` Anton Ivanov
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2018-11-14 15:28 UTC (permalink / raw)
  To: Jens Axboe; +Cc: richard, linux-um, hch, anton.ivanov

On Tue, Nov 13, 2018 at 06:34:31AM -0700, Jens Axboe wrote:
> > +	/* operations with bio_vec arguments */
> > +	case REQ_OP_READ:
> > +	case REQ_OP_WRITE:
> > +		{
> > +			struct req_iterator iter;
> > +			struct bio_vec bvec;
> > +			u64 off = (u64)blk_rq_pos(req) << 9;
> > +
> > +			rq_for_each_segment(bvec, req, iter) {
> > +				ret = ubd_queue_one_vec(hctx, req, off, &bvec);
> > +				if (ret < 0)
> > +					goto out;
> > +				off += bvec.bv_len;
> > +			}
> >  		}
> > +		break;
> 
> This indentation is wrong/awkward. The usual style is:

And for extra point just split the code into a separate helper,
avoiding the whole switch scoping issue entirely.

_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 11:59 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
  2018-11-13 13:40   ` Jens Axboe
@ 2018-11-14 15:30   ` Christoph Hellwig
  2018-11-14 15:35     ` Anton Ivanov
  1 sibling, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2018-11-14 15:30 UTC (permalink / raw)
  To: anton.ivanov; +Cc: axboe, richard, linux-um, hch

Discards are defined to be hints.  So if your fileystems return
-EOPTNOTSUPP you just stop passing them to the file system and
complete them from ->queue_rq.

Write Zeroes isn't.  So if you really want to support it you
should manually write zeroes to the file as a fallback.  But do you
have a real use case for Write Zeroes to start with?

_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/4] um: Clean-up command processing in UML UBD driver
  2018-11-14 15:28     ` Christoph Hellwig
@ 2018-11-14 15:31       ` Anton Ivanov
  2018-11-14 15:33         ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Anton Ivanov @ 2018-11-14 15:31 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe; +Cc: richard, linux-um

On 14/11/2018 15:28, Christoph Hellwig wrote:
> On Tue, Nov 13, 2018 at 06:34:31AM -0700, Jens Axboe wrote:
>>> +	/* operations with bio_vec arguments */
>>> +	case REQ_OP_READ:
>>> +	case REQ_OP_WRITE:
>>> +		{
>>> +			struct req_iterator iter;
>>> +			struct bio_vec bvec;
>>> +			u64 off = (u64)blk_rq_pos(req) << 9;
>>> +
>>> +			rq_for_each_segment(bvec, req, iter) {
>>> +				ret = ubd_queue_one_vec(hctx, req, off, &bvec);
>>> +				if (ret < 0)
>>> +					goto out;
>>> +				off += bvec.bv_len;
>>> +			}
>>>   		}
>>> +		break;
>> This indentation is wrong/awkward. The usual style is:
> And for extra point just split the code into a separate helper,
> avoiding the whole switch scoping issue entirely.
>
I thought about it :) And forgot as I was fighting with the origin of 
the crashes at the time.

Do you want me to reissue the series or I can do an incremental?

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver
  2018-11-13 11:59 ` [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
  2018-11-13 13:33   ` Jens Axboe
@ 2018-11-14 15:32   ` Christoph Hellwig
  2018-11-14 17:00     ` Jens Axboe
  1 sibling, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2018-11-14 15:32 UTC (permalink / raw)
  To: anton.ivanov; +Cc: axboe, richard, linux-um, hch

> +#define UBD_SECTOR_SIZE (1 << 9)

This can just use SECTOR_SIZE I guess.

> +			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
> +				__blk_mq_end_request(io_req->req, io_req->error);

The driver reall should be using blk_mq_end_request instead of playing
games with blk_update_request and __blk_mq_end_request.

_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/4] um: Clean-up command processing in UML UBD driver
  2018-11-14 15:31       ` Anton Ivanov
@ 2018-11-14 15:33         ` Christoph Hellwig
  0 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2018-11-14 15:33 UTC (permalink / raw)
  To: Anton Ivanov; +Cc: Jens Axboe, richard, linux-um, Christoph Hellwig

On Wed, Nov 14, 2018 at 03:31:01PM +0000, Anton Ivanov wrote:
> I thought about it :) And forgot as I was fighting with the origin of the 
> crashes at the time.
>
> Do you want me to reissue the series or I can do an incremental?

Based on the other comments I had a resend might be worthwhile.

_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-14 15:30   ` Christoph Hellwig
@ 2018-11-14 15:35     ` Anton Ivanov
  0 siblings, 0 replies; 25+ messages in thread
From: Anton Ivanov @ 2018-11-14 15:35 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: axboe, richard, linux-um

On 14/11/2018 15:30, Christoph Hellwig wrote:
> Discards are defined to be hints.  So if your fileystems return
> -EOPTNOTSUPP you just stop passing them to the file system and
> complete them from ->queue_rq.
Ack.
>
> Write Zeroes isn't.  So if you really want to support it you
> should manually write zeroes to the file as a fallback.

Hmm... Loop treats them as the same thing. I was looking at what they 
have done.

>   But do you
> have a real use case for Write Zeroes to start with?

I am not aware of.

There is a real case just for trim for now and I was using loop and nbd 
as a "guide" on what needs to be done. Loop has both DISCARD and ZERO.

Several other ops in the API looked interesting as well, but I have not 
seen them implemented anywhere so I skipped them for now.


-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver
  2018-11-14 15:32   ` Christoph Hellwig
@ 2018-11-14 17:00     ` Jens Axboe
  2018-11-14 17:04       ` Anton Ivanov
  0 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2018-11-14 17:00 UTC (permalink / raw)
  To: Christoph Hellwig, anton.ivanov; +Cc: richard, linux-um

On 11/14/18 8:32 AM, Christoph Hellwig wrote:
>> +#define UBD_SECTOR_SIZE (1 << 9)
> 
> This can just use SECTOR_SIZE I guess.
> 
>> +			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>> +				__blk_mq_end_request(io_req->req, io_req->error);
> 
> The driver reall should be using blk_mq_end_request instead of playing
> games with blk_update_request and __blk_mq_end_request.

It's doing partial completions though, so it can't use blk_mq_end_request()
directly. I guess it could sum it up and skip it in the loop, but honestly
I think this is cleaner then.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver
  2018-11-14 17:00     ` Jens Axboe
@ 2018-11-14 17:04       ` Anton Ivanov
  0 siblings, 0 replies; 25+ messages in thread
From: Anton Ivanov @ 2018-11-14 17:04 UTC (permalink / raw)
  To: Jens Axboe, Christoph Hellwig; +Cc: richard, linux-um


On 11/14/18 5:00 PM, Jens Axboe wrote:
> On 11/14/18 8:32 AM, Christoph Hellwig wrote:
>>> +#define UBD_SECTOR_SIZE (1 << 9)
>> This can just use SECTOR_SIZE I guess.
>>
>>> +			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>>> +				__blk_mq_end_request(io_req->req, io_req->error);
>> The driver reall should be using blk_mq_end_request instead of playing
>> games with blk_update_request and __blk_mq_end_request.
> It's doing partial completions though, so it can't use blk_mq_end_request()
> directly. I guess it could sum it up and skip it in the loop, but honestly
> I think this is cleaner then.
Concur. I had a look at the other end/complete functions while chasing 
the root cause of the spurious hangs/aborts and I did not see anything 
else I can use. I may have missed something of course.
>
-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-14 17:09 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
@ 2018-11-14 18:30   ` Anton Ivanov
  0 siblings, 0 replies; 25+ messages in thread
From: Anton Ivanov @ 2018-11-14 18:30 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch


On 11/14/18 5:09 PM, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>
> Support for DISCARD and WRITE_ZEROES in the ubd driver using
> fallocate.
>
> DISCARD is enabled by default and can be disabled using a new
> UBD command line flag.
>
> If the underlying fs on which the UBD image is stored does not
> support DISCARD the support for both DISCARD and WRITE_ZEROES
> is turned off.
>
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>   arch/um/drivers/ubd_kern.c  | 66 +++++++++++++++++++++++++++++++++++++--------
>   arch/um/include/shared/os.h |  1 +
>   arch/um/os-Linux/file.c     | 10 +++++++
>   3 files changed, 66 insertions(+), 11 deletions(-)
>
> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
> index 1672e3c49bfb..e85c7f7fda6f 100644
> --- a/arch/um/drivers/ubd_kern.c
> +++ b/arch/um/drivers/ubd_kern.c
> @@ -154,6 +154,7 @@ struct ubd {
>   	struct openflags openflags;
>   	unsigned shared:1;
>   	unsigned no_cow:1;
> +	unsigned no_trim:1;
>   	struct cow cow;
>   	struct platform_device pdev;
>   	struct request_queue *queue;
> @@ -177,6 +178,7 @@ struct ubd {
>   	.boot_openflags =	OPEN_FLAGS, \
>   	.openflags =		OPEN_FLAGS, \
>   	.no_cow =               0, \
> +	.no_trim =		0, \
>   	.shared =		0, \
>   	.cow =			DEFAULT_COW, \
>   	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
> @@ -323,7 +325,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
>   		*index_out = n;
>   
>   	err = -EINVAL;
> -	for (i = 0; i < sizeof("rscd="); i++) {
> +	for (i = 0; i < sizeof("rscdt="); i++) {
>   		switch (*str) {
>   		case 'r':
>   			flags.w = 0;
> @@ -337,12 +339,15 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
>   		case 'c':
>   			ubd_dev->shared = 1;
>   			break;
> +		case 't':
> +			ubd_dev->no_trim = 1;
> +			break;
>   		case '=':
>   			str++;
>   			goto break_loop;
>   		default:
>   			*error_out = "Expected '=' or flag letter "
> -				"(r, s, c, or d)";
> +				"(r, s, c, t or d)";
>   			goto out;
>   		}
>   		str++;
> @@ -415,6 +420,7 @@ __uml_help(ubd_setup,
>   "    'c' will cause the device to be treated as being shared between multiple\n"
>   "    UMLs and file locking will be turned off - this is appropriate for a\n"
>   "    cluster filesystem and inappropriate at almost all other times.\n\n"
> +"    't' will disable trim/discard support on the device (enabled by default).\n\n"
>   );
>   
>   static int udb_setup(char *str)
> @@ -513,9 +519,17 @@ static void ubd_handler(void)
>   		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>   			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>   
> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
> -				__blk_mq_end_request(io_req->req, io_req->error);
> -
> +			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
> +				blk_queue_max_discard_sectors(io_req->req->q, 0);
> +				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
> +				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
> +			}
> +			if ((io_req->error) || (io_req->buffer == NULL))
> +				blk_mq_end_request(io_req->req, io_req->error);
> +			else {
> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length))
> +					__blk_mq_end_request(io_req->req, io_req->error);
> +			}
>   			kfree(io_req);
>   		}
>   	}
> @@ -829,6 +843,14 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>   		if(err < 0) goto error;
>   		ubd_dev->cow.fd = err;
>   	}
> +	if (ubd_dev->no_trim == 0) {
> +		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
> +		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
> +		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
> +		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
> +		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
> +		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);


Sorry, messed it up when reapplying,  this should have set the zero 
limits as well, not repeat the discard ones.

A revised version will be resent shortly.

A



> +	}
>   	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
>   	return 0;
>    error:
> @@ -1372,6 +1394,10 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
>   	case REQ_OP_WRITE:
>   		ret = queue_rw_req(hctx, req);
>   		break;
> +	case REQ_OP_DISCARD:
> +	case REQ_OP_WRITE_ZEROES:
> +		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
> +		break;
>   	default:
>   		WARN_ON_ONCE(1);
>   		res = BLK_STS_NOTSUPP;
> @@ -1463,7 +1489,7 @@ static int update_bitmap(struct io_thread_req *req)
>   
>   	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
>   			  sizeof(req->bitmap_words), req->cow_offset);
> -	if(n != sizeof(req->bitmap_words))
> +	if (n != sizeof(req->bitmap_words))
>   		return map_error(-n);
>   
>   	return map_error(0);
> @@ -1471,11 +1497,13 @@ static int update_bitmap(struct io_thread_req *req)
>   
>   static void do_io(struct io_thread_req *req)
>   {
> -	char *buf;
> +	char *buf = NULL;
>   	unsigned long len;
>   	int n, nsectors, start, end, bit;
>   	__u64 off;
>   
> +	/* FLUSH is really a special case, we cannot "case" it with others */
> +
>   	if (req_op(req->req) == REQ_OP_FLUSH) {
>   		/* fds[0] is always either the rw image or our cow file */
>   		req->error = map_error(-os_sync_file(req->fds[0]));
> @@ -1495,26 +1523,42 @@ static void do_io(struct io_thread_req *req)
>   		off = req->offset + req->offsets[bit] +
>   			start * req->sectorsize;
>   		len = (end - start) * req->sectorsize;
> -		buf = &req->buffer[start * req->sectorsize];
> +		if (req->buffer != NULL)
> +			buf = &req->buffer[start * req->sectorsize];
>   
> -		if (req_op(req->req) == REQ_OP_READ) {
> +		switch (req_op(req->req)) {
> +		case REQ_OP_READ:
>   			n = 0;
>   			do {
>   				buf = &buf[n];
>   				len -= n;
>   				n = os_pread_file(req->fds[bit], buf, len, off);
> -				if(n < 0){
> +				if (n < 0) {
>   					req->error = map_error(-n);
>   					return;
>   				}
>   			} while((n < len) && (n != 0));
>   			if (n < len) memset(&buf[n], 0, len - n);
> -		} else {
> +			break;
> +		case REQ_OP_WRITE:
>   			n = os_pwrite_file(req->fds[bit], buf, len, off);
>   			if(n != len){
>   				req->error = map_error(-n);
>   				return;
>   			}
> +			break;
> +		case REQ_OP_DISCARD:
> +		case REQ_OP_WRITE_ZEROES:
> +			n = os_falloc_punch(req->fds[bit], off, len);
> +			if (n) {
> +				req->error = map_error(-n);
> +				return;
> +			}
> +			break;
> +		default:
> +			WARN_ON_ONCE(1);
> +			req->error = BLK_STS_NOTSUPP;
> +			return;
>   		}
>   
>   		start = end;
> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
> index 048ae37eb5aa..ebf23012a59b 100644
> --- a/arch/um/include/shared/os.h
> +++ b/arch/um/include/shared/os.h
> @@ -175,6 +175,7 @@ extern int os_fchange_dir(int fd);
>   extern unsigned os_major(unsigned long long dev);
>   extern unsigned os_minor(unsigned long long dev);
>   extern unsigned long long os_makedev(unsigned major, unsigned minor);
> +extern int os_falloc_punch(int fd, unsigned long long offset, int count);
>   
>   /* start_up.c */
>   extern void os_early_checks(void);
> diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
> index c0197097c86e..f25b110d4e70 100644
> --- a/arch/um/os-Linux/file.c
> +++ b/arch/um/os-Linux/file.c
> @@ -610,3 +610,13 @@ unsigned long long os_makedev(unsigned major, unsigned minor)
>   {
>   	return makedev(major, minor);
>   }
> +
> +int os_falloc_punch(int fd, unsigned long long offset, int len)
> +{
> +	int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
> +
> +	if (n < 0)
> +		return -errno;
> +	return n;
> +}
> +

-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-14 17:09 [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
@ 2018-11-14 17:09 ` anton.ivanov
  2018-11-14 18:30   ` Anton Ivanov
  0 siblings, 1 reply; 25+ messages in thread
From: anton.ivanov @ 2018-11-14 17:09 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Support for DISCARD and WRITE_ZEROES in the ubd driver using
fallocate.

DISCARD is enabled by default and can be disabled using a new
UBD command line flag.

If the underlying fs on which the UBD image is stored does not
support DISCARD the support for both DISCARD and WRITE_ZEROES
is turned off.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c  | 66 +++++++++++++++++++++++++++++++++++++--------
 arch/um/include/shared/os.h |  1 +
 arch/um/os-Linux/file.c     | 10 +++++++
 3 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 1672e3c49bfb..e85c7f7fda6f 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -154,6 +154,7 @@ struct ubd {
 	struct openflags openflags;
 	unsigned shared:1;
 	unsigned no_cow:1;
+	unsigned no_trim:1;
 	struct cow cow;
 	struct platform_device pdev;
 	struct request_queue *queue;
@@ -177,6 +178,7 @@ struct ubd {
 	.boot_openflags =	OPEN_FLAGS, \
 	.openflags =		OPEN_FLAGS, \
 	.no_cow =               0, \
+	.no_trim =		0, \
 	.shared =		0, \
 	.cow =			DEFAULT_COW, \
 	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
@@ -323,7 +325,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 		*index_out = n;
 
 	err = -EINVAL;
-	for (i = 0; i < sizeof("rscd="); i++) {
+	for (i = 0; i < sizeof("rscdt="); i++) {
 		switch (*str) {
 		case 'r':
 			flags.w = 0;
@@ -337,12 +339,15 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 		case 'c':
 			ubd_dev->shared = 1;
 			break;
+		case 't':
+			ubd_dev->no_trim = 1;
+			break;
 		case '=':
 			str++;
 			goto break_loop;
 		default:
 			*error_out = "Expected '=' or flag letter "
-				"(r, s, c, or d)";
+				"(r, s, c, t or d)";
 			goto out;
 		}
 		str++;
@@ -415,6 +420,7 @@ __uml_help(ubd_setup,
 "    'c' will cause the device to be treated as being shared between multiple\n"
 "    UMLs and file locking will be turned off - this is appropriate for a\n"
 "    cluster filesystem and inappropriate at almost all other times.\n\n"
+"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 );
 
 static int udb_setup(char *str)
@@ -513,9 +519,17 @@ static void ubd_handler(void)
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 
-			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
-				__blk_mq_end_request(io_req->req, io_req->error);
-
+			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
+				blk_queue_max_discard_sectors(io_req->req->q, 0);
+				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
+				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
+			}
+			if ((io_req->error) || (io_req->buffer == NULL))
+				blk_mq_end_request(io_req->req, io_req->error);
+			else {
+				if (!blk_update_request(io_req->req, io_req->error, io_req->length))
+					__blk_mq_end_request(io_req->req, io_req->error);
+			}
 			kfree(io_req);
 		}
 	}
@@ -829,6 +843,14 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 		if(err < 0) goto error;
 		ubd_dev->cow.fd = err;
 	}
+	if (ubd_dev->no_trim == 0) {
+		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
+		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
+		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
+		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
+		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
+	}
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 	return 0;
  error:
@@ -1372,6 +1394,10 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_WRITE:
 		ret = queue_rw_req(hctx, req);
 		break;
+	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
+		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		res = BLK_STS_NOTSUPP;
@@ -1463,7 +1489,7 @@ static int update_bitmap(struct io_thread_req *req)
 
 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
 			  sizeof(req->bitmap_words), req->cow_offset);
-	if(n != sizeof(req->bitmap_words))
+	if (n != sizeof(req->bitmap_words))
 		return map_error(-n);
 
 	return map_error(0);
@@ -1471,11 +1497,13 @@ static int update_bitmap(struct io_thread_req *req)
 
 static void do_io(struct io_thread_req *req)
 {
-	char *buf;
+	char *buf = NULL;
 	unsigned long len;
 	int n, nsectors, start, end, bit;
 	__u64 off;
 
+	/* FLUSH is really a special case, we cannot "case" it with others */
+
 	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
 		req->error = map_error(-os_sync_file(req->fds[0]));
@@ -1495,26 +1523,42 @@ static void do_io(struct io_thread_req *req)
 		off = req->offset + req->offsets[bit] +
 			start * req->sectorsize;
 		len = (end - start) * req->sectorsize;
-		buf = &req->buffer[start * req->sectorsize];
+		if (req->buffer != NULL)
+			buf = &req->buffer[start * req->sectorsize];
 
-		if (req_op(req->req) == REQ_OP_READ) {
+		switch (req_op(req->req)) {
+		case REQ_OP_READ:
 			n = 0;
 			do {
 				buf = &buf[n];
 				len -= n;
 				n = os_pread_file(req->fds[bit], buf, len, off);
-				if(n < 0){
+				if (n < 0) {
 					req->error = map_error(-n);
 					return;
 				}
 			} while((n < len) && (n != 0));
 			if (n < len) memset(&buf[n], 0, len - n);
-		} else {
+			break;
+		case REQ_OP_WRITE:
 			n = os_pwrite_file(req->fds[bit], buf, len, off);
 			if(n != len){
 				req->error = map_error(-n);
 				return;
 			}
+			break;
+		case REQ_OP_DISCARD:
+		case REQ_OP_WRITE_ZEROES:
+			n = os_falloc_punch(req->fds[bit], off, len);
+			if (n) {
+				req->error = map_error(-n);
+				return;
+			}
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			req->error = BLK_STS_NOTSUPP;
+			return;
 		}
 
 		start = end;
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 048ae37eb5aa..ebf23012a59b 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -175,6 +175,7 @@ extern int os_fchange_dir(int fd);
 extern unsigned os_major(unsigned long long dev);
 extern unsigned os_minor(unsigned long long dev);
 extern unsigned long long os_makedev(unsigned major, unsigned minor);
+extern int os_falloc_punch(int fd, unsigned long long offset, int count);
 
 /* start_up.c */
 extern void os_early_checks(void);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index c0197097c86e..f25b110d4e70 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -610,3 +610,13 @@ unsigned long long os_makedev(unsigned major, unsigned minor)
 {
 	return makedev(major, minor);
 }
+
+int os_falloc_punch(int fd, unsigned long long offset, int len)
+{
+	int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-14  8:10 [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
@ 2018-11-14  8:10 ` anton.ivanov
  0 siblings, 0 replies; 25+ messages in thread
From: anton.ivanov @ 2018-11-14  8:10 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Support for DISCARD and WRITE_ZEROES in the ubd driver using
fallocate.

DISCARD is enabled by default and can be disabled using a new
UBD command line flag.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c  | 59 ++++++++++++++++++++++++++++++++++++---------
 arch/um/include/shared/os.h |  1 +
 arch/um/os-Linux/file.c     | 10 ++++++++
 3 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index c08d162f5dc9..8e1179143df6 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -155,6 +155,7 @@ struct ubd {
 	struct openflags openflags;
 	unsigned shared:1;
 	unsigned no_cow:1;
+	unsigned no_trim:1;
 	struct cow cow;
 	struct platform_device pdev;
 	struct request_queue *queue;
@@ -178,6 +179,7 @@ struct ubd {
 	.boot_openflags =	OPEN_FLAGS, \
 	.openflags =		OPEN_FLAGS, \
 	.no_cow =               0, \
+	.no_trim =		0, \
 	.shared =		0, \
 	.cow =			DEFAULT_COW, \
 	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
@@ -324,7 +326,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 		*index_out = n;
 
 	err = -EINVAL;
-	for (i = 0; i < sizeof("rscd="); i++) {
+	for (i = 0; i < sizeof("rscdt="); i++) {
 		switch (*str) {
 		case 'r':
 			flags.w = 0;
@@ -338,12 +340,15 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 		case 'c':
 			ubd_dev->shared = 1;
 			break;
+		case 't':
+			ubd_dev->no_trim = 1;
+			break;
 		case '=':
 			str++;
 			goto break_loop;
 		default:
 			*error_out = "Expected '=' or flag letter "
-				"(r, s, c, or d)";
+				"(r, s, c, t or d)";
 			goto out;
 		}
 		str++;
@@ -416,6 +421,7 @@ __uml_help(ubd_setup,
 "    'c' will cause the device to be treated as being shared between multiple\n"
 "    UMLs and file locking will be turned off - this is appropriate for a\n"
 "    cluster filesystem and inappropriate at almost all other times.\n\n"
+"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 );
 
 static int udb_setup(char *str)
@@ -514,9 +520,12 @@ static void ubd_handler(void)
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 
-			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
-				__blk_mq_end_request(io_req->req, io_req->error);
-
+			if ((io_req->error) || (io_req->buffer == NULL))
+				blk_mq_end_request(io_req->req, io_req->error);
+			else {
+				if (!blk_update_request(io_req->req, io_req->error, io_req->length))
+					__blk_mq_end_request(io_req->req, io_req->error);
+			}
 			kfree(io_req);
 		}
 	}
@@ -830,6 +839,12 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 		if(err < 0) goto error;
 		ubd_dev->cow.fd = err;
 	}
+	if (ubd_dev->no_trim == 0) {
+		ubd_dev->queue->limits.discard_granularity = UBD_SECTOR_SIZE;
+		ubd_dev->queue->limits.discard_alignment = UBD_SECTOR_SIZE;
+		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
+	}
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 	return 0;
  error:
@@ -1369,6 +1384,10 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 			}
 		}
 		break;
+	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
+		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		spin_unlock_irq(&ubd_dev->lock);
@@ -1461,7 +1480,7 @@ static int update_bitmap(struct io_thread_req *req)
 
 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
 			  sizeof(req->bitmap_words), req->cow_offset);
-	if(n != sizeof(req->bitmap_words))
+	if (n != sizeof(req->bitmap_words))
 		return map_error(-n);
 
 	return map_error(0);
@@ -1469,11 +1488,13 @@ static int update_bitmap(struct io_thread_req *req)
 
 static void do_io(struct io_thread_req *req)
 {
-	char *buf;
+	char *buf = NULL;
 	unsigned long len;
 	int n, nsectors, start, end, bit;
 	__u64 off;
 
+	/* FLUSH is really a special case, we cannot "case" it with others */
+
 	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
 		req->error = map_error(-os_sync_file(req->fds[0]));
@@ -1493,26 +1514,42 @@ static void do_io(struct io_thread_req *req)
 		off = req->offset + req->offsets[bit] +
 			start * req->sectorsize;
 		len = (end - start) * req->sectorsize;
-		buf = &req->buffer[start * req->sectorsize];
+		if (req->buffer != NULL)
+			buf = &req->buffer[start * req->sectorsize];
 
-		if (req_op(req->req) == REQ_OP_READ) {
+		switch (req_op(req->req)) {
+		case REQ_OP_READ:
 			n = 0;
 			do {
 				buf = &buf[n];
 				len -= n;
 				n = os_pread_file(req->fds[bit], buf, len, off);
-				if(n < 0){
+				if (n < 0) {
 					req->error = map_error(-n);
 					return;
 				}
 			} while((n < len) && (n != 0));
 			if (n < len) memset(&buf[n], 0, len - n);
-		} else {
+			break;
+		case REQ_OP_WRITE:
 			n = os_pwrite_file(req->fds[bit], buf, len, off);
 			if(n != len){
 				req->error = map_error(-n);
 				return;
 			}
+			break;
+		case REQ_OP_DISCARD:
+		case REQ_OP_WRITE_ZEROES:
+			n = os_falloc_punch(req->fds[bit], off, len);
+			if (n) {
+				req->error = map_error(-n);
+				return;
+			}
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			req->error = BLK_STS_NOTSUPP;
+			return;
 		}
 
 		start = end;
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 048ae37eb5aa..ebf23012a59b 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -175,6 +175,7 @@ extern int os_fchange_dir(int fd);
 extern unsigned os_major(unsigned long long dev);
 extern unsigned os_minor(unsigned long long dev);
 extern unsigned long long os_makedev(unsigned major, unsigned minor);
+extern int os_falloc_punch(int fd, unsigned long long offset, int count);
 
 /* start_up.c */
 extern void os_early_checks(void);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index c0197097c86e..f25b110d4e70 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -610,3 +610,13 @@ unsigned long long os_makedev(unsigned major, unsigned minor)
 {
 	return makedev(major, minor);
 }
+
+int os_falloc_punch(int fd, unsigned long long offset, int len)
+{
+	int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2018-11-14 18:31 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-13 11:59 Revised and fixed patchset anton.ivanov
2018-11-13 11:59 ` [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
2018-11-13 13:33   ` Jens Axboe
2018-11-14 15:32   ` Christoph Hellwig
2018-11-14 17:00     ` Jens Axboe
2018-11-14 17:04       ` Anton Ivanov
2018-11-13 11:59 ` [PATCH 2/4] um: Clean-up command processing in " anton.ivanov
2018-11-13 13:34   ` Jens Axboe
2018-11-14 15:28     ` Christoph Hellwig
2018-11-14 15:31       ` Anton Ivanov
2018-11-14 15:33         ` Christoph Hellwig
2018-11-13 11:59 ` [PATCH 3/4] um: Remove unsafe printks from the io thread anton.ivanov
2018-11-13 13:31   ` Jens Axboe
2018-11-13 11:59 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
2018-11-13 13:40   ` Jens Axboe
2018-11-13 14:04     ` Anton Ivanov
2018-11-13 15:18       ` Jens Axboe
2018-11-13 15:32         ` Anton Ivanov
2018-11-13 15:49           ` Jens Axboe
2018-11-13 15:52             ` Anton Ivanov
2018-11-14 15:30   ` Christoph Hellwig
2018-11-14 15:35     ` Anton Ivanov
2018-11-14  8:10 [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
2018-11-14  8:10 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
2018-11-14 17:09 [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
2018-11-14 17:09 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
2018-11-14 18:30   ` Anton Ivanov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.