All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver
@ 2018-11-14 17:09 anton.ivanov
  2018-11-14 17:09 ` [PATCH 2/4] um: Clean-up command processing in " anton.ivanov
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: anton.ivanov @ 2018-11-14 17:09 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Switch to block mq-constants for both commands, error codes and
various computations.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c | 66 ++++++++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 28c40624bcb6..3a8a3d403ef8 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -43,11 +43,11 @@
 #include <os.h>
 #include "cow.h"
 
-enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
+/* Max request size is determined by sector mask - 32K */
+#define UBD_MAX_REQUEST (8 * sizeof(long))
 
 struct io_thread_req {
 	struct request *req;
-	enum ubd_req op;
 	int fds[2];
 	unsigned long offsets[2];
 	unsigned long long offset;
@@ -511,15 +511,13 @@ static void ubd_handler(void)
 		}
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			struct io_thread_req *io_req = (*irq_req_buffer)[count];
-			int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK;
 
-			if (!blk_update_request(io_req->req, err, io_req->length))
-				__blk_mq_end_request(io_req->req, err);
+			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
+				__blk_mq_end_request(io_req->req, io_req->error);
 
 			kfree(io_req);
 		}
 	}
-
 	reactivate_fd(thread_fd, UBD_IRQ);
 }
 
@@ -789,7 +787,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 
 	if((fd == -ENOENT) && create_cow){
 		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
-					  ubd_dev->openflags, 1 << 9, PAGE_SIZE,
+					  ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
 					  &ubd_dev->cow.bitmap_offset,
 					  &ubd_dev->cow.bitmap_len,
 					  &ubd_dev->cow.data_offset);
@@ -830,6 +828,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 		if(err < 0) goto error;
 		ubd_dev->cow.fd = err;
 	}
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 	return 0;
  error:
 	os_close_file(ubd_dev->fd);
@@ -882,7 +881,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
 	return 0;
 }
 
-#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
+#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
 
 static const struct blk_mq_ops ubd_mq_ops = {
 	.queue_rq = ubd_queue_rq,
@@ -1234,10 +1233,10 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
 			  __u64 bitmap_offset, unsigned long *bitmap_words,
 			  __u64 bitmap_len)
 {
-	__u64 sector = io_offset >> 9;
+	__u64 sector = io_offset >> SECTOR_SHIFT;
 	int i, update_bitmap = 0;
 
-	for(i = 0; i < length >> 9; i++){
+	for (i = 0; i < length >> SECTOR_SHIFT; i++) {
 		if(cow_mask != NULL)
 			ubd_set_bit(i, (unsigned char *) cow_mask);
 		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
@@ -1271,14 +1270,14 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
 static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
 		       __u64 bitmap_offset, __u64 bitmap_len)
 {
-	__u64 sector = req->offset >> 9;
+	__u64 sector = req->offset >> SECTOR_SHIFT;
 	int i;
 
-	if(req->length > (sizeof(req->sector_mask) * 8) << 9)
+	if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT)
 		panic("Operation too long");
 
-	if(req->op == UBD_READ) {
-		for(i = 0; i < req->length >> 9; i++){
+	if (req_op(req->req) == REQ_OP_READ) {
+		for (i = 0; i < req->length >> SECTOR_SHIFT; i++) {
 			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
 				ubd_set_bit(i, (unsigned char *)
 					    &req->sector_mask);
@@ -1307,19 +1306,16 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
 		io_req->fds[0] = dev->fd;
 	io_req->error = 0;
 
-	if (req_op(req) == REQ_OP_FLUSH) {
-		io_req->op = UBD_FLUSH;
-	} else {
+	if (req_op(req) != REQ_OP_FLUSH) {
 		io_req->fds[1] = dev->fd;
 		io_req->cow_offset = -1;
 		io_req->offset = off;
 		io_req->length = bvec->bv_len;
 		io_req->sector_mask = 0;
-		io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE;
 		io_req->offsets[0] = 0;
 		io_req->offsets[1] = dev->cow.data_offset;
 		io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
-		io_req->sectorsize = 1 << 9;
+		io_req->sectorsize = SECTOR_SIZE;
 
 		if (dev->cow.file) {
 			cowify_req(io_req, dev->cow.bitmap,
@@ -1353,7 +1349,7 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	} else {
 		struct req_iterator iter;
 		struct bio_vec bvec;
-		u64 off = (u64)blk_rq_pos(req) << 9;
+		u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT;
 
 		rq_for_each_segment(bvec, req, iter) {
 			ret = ubd_queue_one_vec(hctx, req, off, &bvec);
@@ -1413,22 +1409,36 @@ static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 	return -EINVAL;
 }
 
+static int map_error(int error_code)
+{
+	switch (error_code) {
+	case 0:
+		return BLK_STS_OK;
+	case ENOSYS:
+	case EOPNOTSUPP:
+		return BLK_STS_NOTSUPP;
+	case ENOSPC:
+		return BLK_STS_NOSPC;
+	}
+	return BLK_STS_IOERR;
+}
+
 static int update_bitmap(struct io_thread_req *req)
 {
 	int n;
 
 	if(req->cow_offset == -1)
-		return 0;
+		return map_error(0);
 
 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
 			  sizeof(req->bitmap_words), req->cow_offset);
 	if(n != sizeof(req->bitmap_words)){
 		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
 		       req->fds[1]);
-		return 1;
+		return map_error(-n);
 	}
 
-	return 0;
+	return map_error(0);
 }
 
 static void do_io(struct io_thread_req *req)
@@ -1438,13 +1448,13 @@ static void do_io(struct io_thread_req *req)
 	int n, nsectors, start, end, bit;
 	__u64 off;
 
-	if (req->op == UBD_FLUSH) {
+	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
 		n = os_sync_file(req->fds[0]);
 		if (n != 0) {
 			printk("do_io - sync failed err = %d "
 			       "fd = %d\n", -n, req->fds[0]);
-			req->error = 1;
+			req->error = map_error(-n);
 		}
 		return;
 	}
@@ -1464,7 +1474,7 @@ static void do_io(struct io_thread_req *req)
 		len = (end - start) * req->sectorsize;
 		buf = &req->buffer[start * req->sectorsize];
 
-		if(req->op == UBD_READ){
+		if (req_op(req->req) == REQ_OP_READ) {
 			n = 0;
 			do {
 				buf = &buf[n];
@@ -1473,7 +1483,7 @@ static void do_io(struct io_thread_req *req)
 				if (n < 0) {
 					printk("do_io - read failed, err = %d "
 					       "fd = %d\n", -n, req->fds[bit]);
-					req->error = 1;
+					req->error = map_error(-n);
 					return;
 				}
 			} while((n < len) && (n != 0));
@@ -1483,7 +1493,7 @@ static void do_io(struct io_thread_req *req)
 			if(n != len){
 				printk("do_io - write failed err = %d "
 				       "fd = %d\n", -n, req->fds[bit]);
-				req->error = 1;
+				req->error = map_error(-n);
 				return;
 			}
 		}
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/4] um: Clean-up command processing in UML UBD driver
  2018-11-14 17:09 [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
@ 2018-11-14 17:09 ` anton.ivanov
  2018-11-14 17:09 ` [PATCH 3/4] um: Remove unsafe printks from the io thread anton.ivanov
  2018-11-14 17:09 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
  2 siblings, 0 replies; 15+ messages in thread
From: anton.ivanov @ 2018-11-14 17:09 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Clean-up command processing and return BLK_STS_NOTSUP for
uknown commands.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c | 77 ++++++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 30 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 3a8a3d403ef8..58860ef6931a 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1306,65 +1306,82 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
 		io_req->fds[0] = dev->fd;
 	io_req->error = 0;
 
-	if (req_op(req) != REQ_OP_FLUSH) {
-		io_req->fds[1] = dev->fd;
-		io_req->cow_offset = -1;
-		io_req->offset = off;
-		io_req->length = bvec->bv_len;
-		io_req->sector_mask = 0;
-		io_req->offsets[0] = 0;
-		io_req->offsets[1] = dev->cow.data_offset;
+	if (bvec != NULL) {
 		io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
-		io_req->sectorsize = SECTOR_SIZE;
-
-		if (dev->cow.file) {
-			cowify_req(io_req, dev->cow.bitmap,
-				   dev->cow.bitmap_offset, dev->cow.bitmap_len);
-		}
+		io_req->length = bvec->bv_len;
+	} else {
+		io_req->buffer = NULL;
+		io_req->length = blk_rq_bytes(req);
 	}
 
+	io_req->sectorsize = SECTOR_SIZE;
+	io_req->fds[1] = dev->fd;
+	io_req->cow_offset = -1;
+	io_req->offset = off;
+	io_req->sector_mask = 0;
+	io_req->offsets[0] = 0;
+	io_req->offsets[1] = dev->cow.data_offset;
+
+	if (dev->cow.file)
+		cowify_req(io_req, dev->cow.bitmap,
+			   dev->cow.bitmap_offset, dev->cow.bitmap_len);
+
 	ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
 	if (ret != sizeof(io_req)) {
 		if (ret != -EAGAIN)
 			pr_err("write to io thread failed: %d\n", -ret);
 		kfree(io_req);
 	}
-
 	return ret;
 }
 
+static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req)
+{
+	struct req_iterator iter;
+	struct bio_vec bvec;
+	int ret;
+	u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT;
+
+	rq_for_each_segment(bvec, req, iter) {
+		ret = ubd_queue_one_vec(hctx, req, off, &bvec);
+		if (ret < 0)
+			return ret;
+		off += bvec.bv_len;
+	}
+	return 0;
+}
+
 static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 				 const struct blk_mq_queue_data *bd)
 {
 	struct ubd *ubd_dev = hctx->queue->queuedata;
 	struct request *req = bd->rq;
-	int ret = 0;
+	int ret = 0, res = BLK_STS_OK;
 
 	blk_mq_start_request(req);
 
 	spin_lock_irq(&ubd_dev->lock);
 
-	if (req_op(req) == REQ_OP_FLUSH) {
+	switch (req_op(req)) {
+	/* operations with no lentgth/offset arguments */
+	case REQ_OP_FLUSH:
 		ret = ubd_queue_one_vec(hctx, req, 0, NULL);
-	} else {
-		struct req_iterator iter;
-		struct bio_vec bvec;
-		u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT;
-
-		rq_for_each_segment(bvec, req, iter) {
-			ret = ubd_queue_one_vec(hctx, req, off, &bvec);
-			if (ret < 0)
-				goto out;
-			off += bvec.bv_len;
-		}
+		break;
+	case REQ_OP_READ:
+	case REQ_OP_WRITE:
+		ret = queue_rw_req(hctx, req);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		res = BLK_STS_NOTSUPP;
 	}
-out:
+
 	spin_unlock_irq(&ubd_dev->lock);
 
 	if (ret < 0)
 		blk_mq_requeue_request(req, true);
 
-	return BLK_STS_OK;
+	return res;
 }
 
 static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/4] um: Remove unsafe printks from the io thread
  2018-11-14 17:09 [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
  2018-11-14 17:09 ` [PATCH 2/4] um: Clean-up command processing in " anton.ivanov
@ 2018-11-14 17:09 ` anton.ivanov
  2018-11-14 17:09 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
  2 siblings, 0 replies; 15+ messages in thread
From: anton.ivanov @ 2018-11-14 17:09 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Printk out of the io thread has been proven to be unsafe. This
is not surprising as the thread is part of the UML hypervisor
code. It is not supposed to invoke any kernel code/resources.

It is necesssary to pass the error to the block IO layer and let it

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c | 42 +++++++++++++++++-------------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 58860ef6931a..1672e3c49bfb 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2018 Cambridge Greys Ltd
  * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
@@ -1440,6 +1441,19 @@ static int map_error(int error_code)
 	return BLK_STS_IOERR;
 }
 
+/*
+ * Everything from here onwards *IS NOT PART OF THE KERNEL*
+ *
+ * The following functions are part of UML hypervisor code.
+ * All functions from here onwards are executed as a helper
+ * thread and are not allowed to execute any kernel functions.
+ *
+ * Any communication must occur strictly via shared memory and IPC.
+ *
+ * Do not add printks, locks, kernel memory operations, etc - it
+ * will result in unpredictable behaviour and/or crashes.
+ */
+
 static int update_bitmap(struct io_thread_req *req)
 {
 	int n;
@@ -1449,11 +1463,8 @@ static int update_bitmap(struct io_thread_req *req)
 
 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
 			  sizeof(req->bitmap_words), req->cow_offset);
-	if(n != sizeof(req->bitmap_words)){
-		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
-		       req->fds[1]);
+	if(n != sizeof(req->bitmap_words))
 		return map_error(-n);
-	}
 
 	return map_error(0);
 }
@@ -1467,12 +1478,7 @@ static void do_io(struct io_thread_req *req)
 
 	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
-		n = os_sync_file(req->fds[0]);
-		if (n != 0) {
-			printk("do_io - sync failed err = %d "
-			       "fd = %d\n", -n, req->fds[0]);
-			req->error = map_error(-n);
-		}
+		req->error = map_error(-os_sync_file(req->fds[0]));
 		return;
 	}
 
@@ -1497,9 +1503,7 @@ static void do_io(struct io_thread_req *req)
 				buf = &buf[n];
 				len -= n;
 				n = os_pread_file(req->fds[bit], buf, len, off);
-				if (n < 0) {
-					printk("do_io - read failed, err = %d "
-					       "fd = %d\n", -n, req->fds[bit]);
+				if(n < 0){
 					req->error = map_error(-n);
 					return;
 				}
@@ -1508,8 +1512,6 @@ static void do_io(struct io_thread_req *req)
 		} else {
 			n = os_pwrite_file(req->fds[bit], buf, len, off);
 			if(n != len){
-				printk("do_io - write failed err = %d "
-				       "fd = %d\n", -n, req->fds[bit]);
 				req->error = map_error(-n);
 				return;
 			}
@@ -1547,11 +1549,6 @@ int io_thread(void *arg)
 			if (n == -EAGAIN) {
 				ubd_read_poll(-1);
 				continue;
-			} else {
-				printk("io_thread - read failed, fd = %d, "
-				       "err = %d,"
-				       "reminder = %d\n",
-				       kernel_fd, -n, io_remainder_size);
 			}
 		}
 
@@ -1566,11 +1563,6 @@ int io_thread(void *arg)
 			res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
 			if (res >= 0) {
 				written += res;
-			} else {
-				if (res != -EAGAIN) {
-					printk("io_thread - write failed, fd = %d, "
-					       "err = %d\n", kernel_fd, -n);
-				}
 			}
 			if (written < n) {
 				ubd_write_poll(-1);
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-14 17:09 [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
  2018-11-14 17:09 ` [PATCH 2/4] um: Clean-up command processing in " anton.ivanov
  2018-11-14 17:09 ` [PATCH 3/4] um: Remove unsafe printks from the io thread anton.ivanov
@ 2018-11-14 17:09 ` anton.ivanov
  2018-11-14 18:30   ` Anton Ivanov
  2 siblings, 1 reply; 15+ messages in thread
From: anton.ivanov @ 2018-11-14 17:09 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Support for DISCARD and WRITE_ZEROES in the ubd driver using
fallocate.

DISCARD is enabled by default and can be disabled using a new
UBD command line flag.

If the underlying fs on which the UBD image is stored does not
support DISCARD the support for both DISCARD and WRITE_ZEROES
is turned off.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c  | 66 +++++++++++++++++++++++++++++++++++++--------
 arch/um/include/shared/os.h |  1 +
 arch/um/os-Linux/file.c     | 10 +++++++
 3 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 1672e3c49bfb..e85c7f7fda6f 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -154,6 +154,7 @@ struct ubd {
 	struct openflags openflags;
 	unsigned shared:1;
 	unsigned no_cow:1;
+	unsigned no_trim:1;
 	struct cow cow;
 	struct platform_device pdev;
 	struct request_queue *queue;
@@ -177,6 +178,7 @@ struct ubd {
 	.boot_openflags =	OPEN_FLAGS, \
 	.openflags =		OPEN_FLAGS, \
 	.no_cow =               0, \
+	.no_trim =		0, \
 	.shared =		0, \
 	.cow =			DEFAULT_COW, \
 	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
@@ -323,7 +325,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 		*index_out = n;
 
 	err = -EINVAL;
-	for (i = 0; i < sizeof("rscd="); i++) {
+	for (i = 0; i < sizeof("rscdt="); i++) {
 		switch (*str) {
 		case 'r':
 			flags.w = 0;
@@ -337,12 +339,15 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 		case 'c':
 			ubd_dev->shared = 1;
 			break;
+		case 't':
+			ubd_dev->no_trim = 1;
+			break;
 		case '=':
 			str++;
 			goto break_loop;
 		default:
 			*error_out = "Expected '=' or flag letter "
-				"(r, s, c, or d)";
+				"(r, s, c, t or d)";
 			goto out;
 		}
 		str++;
@@ -415,6 +420,7 @@ __uml_help(ubd_setup,
 "    'c' will cause the device to be treated as being shared between multiple\n"
 "    UMLs and file locking will be turned off - this is appropriate for a\n"
 "    cluster filesystem and inappropriate at almost all other times.\n\n"
+"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 );
 
 static int udb_setup(char *str)
@@ -513,9 +519,17 @@ static void ubd_handler(void)
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 
-			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
-				__blk_mq_end_request(io_req->req, io_req->error);
-
+			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
+				blk_queue_max_discard_sectors(io_req->req->q, 0);
+				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
+				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
+			}
+			if ((io_req->error) || (io_req->buffer == NULL))
+				blk_mq_end_request(io_req->req, io_req->error);
+			else {
+				if (!blk_update_request(io_req->req, io_req->error, io_req->length))
+					__blk_mq_end_request(io_req->req, io_req->error);
+			}
 			kfree(io_req);
 		}
 	}
@@ -829,6 +843,14 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 		if(err < 0) goto error;
 		ubd_dev->cow.fd = err;
 	}
+	if (ubd_dev->no_trim == 0) {
+		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
+		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
+		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
+		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
+		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
+	}
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 	return 0;
  error:
@@ -1372,6 +1394,10 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_WRITE:
 		ret = queue_rw_req(hctx, req);
 		break;
+	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
+		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		res = BLK_STS_NOTSUPP;
@@ -1463,7 +1489,7 @@ static int update_bitmap(struct io_thread_req *req)
 
 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
 			  sizeof(req->bitmap_words), req->cow_offset);
-	if(n != sizeof(req->bitmap_words))
+	if (n != sizeof(req->bitmap_words))
 		return map_error(-n);
 
 	return map_error(0);
@@ -1471,11 +1497,13 @@ static int update_bitmap(struct io_thread_req *req)
 
 static void do_io(struct io_thread_req *req)
 {
-	char *buf;
+	char *buf = NULL;
 	unsigned long len;
 	int n, nsectors, start, end, bit;
 	__u64 off;
 
+	/* FLUSH is really a special case, we cannot "case" it with others */
+
 	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
 		req->error = map_error(-os_sync_file(req->fds[0]));
@@ -1495,26 +1523,42 @@ static void do_io(struct io_thread_req *req)
 		off = req->offset + req->offsets[bit] +
 			start * req->sectorsize;
 		len = (end - start) * req->sectorsize;
-		buf = &req->buffer[start * req->sectorsize];
+		if (req->buffer != NULL)
+			buf = &req->buffer[start * req->sectorsize];
 
-		if (req_op(req->req) == REQ_OP_READ) {
+		switch (req_op(req->req)) {
+		case REQ_OP_READ:
 			n = 0;
 			do {
 				buf = &buf[n];
 				len -= n;
 				n = os_pread_file(req->fds[bit], buf, len, off);
-				if(n < 0){
+				if (n < 0) {
 					req->error = map_error(-n);
 					return;
 				}
 			} while((n < len) && (n != 0));
 			if (n < len) memset(&buf[n], 0, len - n);
-		} else {
+			break;
+		case REQ_OP_WRITE:
 			n = os_pwrite_file(req->fds[bit], buf, len, off);
 			if(n != len){
 				req->error = map_error(-n);
 				return;
 			}
+			break;
+		case REQ_OP_DISCARD:
+		case REQ_OP_WRITE_ZEROES:
+			n = os_falloc_punch(req->fds[bit], off, len);
+			if (n) {
+				req->error = map_error(-n);
+				return;
+			}
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			req->error = BLK_STS_NOTSUPP;
+			return;
 		}
 
 		start = end;
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 048ae37eb5aa..ebf23012a59b 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -175,6 +175,7 @@ extern int os_fchange_dir(int fd);
 extern unsigned os_major(unsigned long long dev);
 extern unsigned os_minor(unsigned long long dev);
 extern unsigned long long os_makedev(unsigned major, unsigned minor);
+extern int os_falloc_punch(int fd, unsigned long long offset, int count);
 
 /* start_up.c */
 extern void os_early_checks(void);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index c0197097c86e..f25b110d4e70 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -610,3 +610,13 @@ unsigned long long os_makedev(unsigned major, unsigned minor)
 {
 	return makedev(major, minor);
 }
+
+int os_falloc_punch(int fd, unsigned long long offset, int len)
+{
+	int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-14 17:09 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
@ 2018-11-14 18:30   ` Anton Ivanov
  0 siblings, 0 replies; 15+ messages in thread
From: Anton Ivanov @ 2018-11-14 18:30 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch


On 11/14/18 5:09 PM, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>
> Support for DISCARD and WRITE_ZEROES in the ubd driver using
> fallocate.
>
> DISCARD is enabled by default and can be disabled using a new
> UBD command line flag.
>
> If the underlying fs on which the UBD image is stored does not
> support DISCARD the support for both DISCARD and WRITE_ZEROES
> is turned off.
>
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>   arch/um/drivers/ubd_kern.c  | 66 +++++++++++++++++++++++++++++++++++++--------
>   arch/um/include/shared/os.h |  1 +
>   arch/um/os-Linux/file.c     | 10 +++++++
>   3 files changed, 66 insertions(+), 11 deletions(-)
>
> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
> index 1672e3c49bfb..e85c7f7fda6f 100644
> --- a/arch/um/drivers/ubd_kern.c
> +++ b/arch/um/drivers/ubd_kern.c
> @@ -154,6 +154,7 @@ struct ubd {
>   	struct openflags openflags;
>   	unsigned shared:1;
>   	unsigned no_cow:1;
> +	unsigned no_trim:1;
>   	struct cow cow;
>   	struct platform_device pdev;
>   	struct request_queue *queue;
> @@ -177,6 +178,7 @@ struct ubd {
>   	.boot_openflags =	OPEN_FLAGS, \
>   	.openflags =		OPEN_FLAGS, \
>   	.no_cow =               0, \
> +	.no_trim =		0, \
>   	.shared =		0, \
>   	.cow =			DEFAULT_COW, \
>   	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
> @@ -323,7 +325,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
>   		*index_out = n;
>   
>   	err = -EINVAL;
> -	for (i = 0; i < sizeof("rscd="); i++) {
> +	for (i = 0; i < sizeof("rscdt="); i++) {
>   		switch (*str) {
>   		case 'r':
>   			flags.w = 0;
> @@ -337,12 +339,15 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
>   		case 'c':
>   			ubd_dev->shared = 1;
>   			break;
> +		case 't':
> +			ubd_dev->no_trim = 1;
> +			break;
>   		case '=':
>   			str++;
>   			goto break_loop;
>   		default:
>   			*error_out = "Expected '=' or flag letter "
> -				"(r, s, c, or d)";
> +				"(r, s, c, t or d)";
>   			goto out;
>   		}
>   		str++;
> @@ -415,6 +420,7 @@ __uml_help(ubd_setup,
>   "    'c' will cause the device to be treated as being shared between multiple\n"
>   "    UMLs and file locking will be turned off - this is appropriate for a\n"
>   "    cluster filesystem and inappropriate at almost all other times.\n\n"
> +"    't' will disable trim/discard support on the device (enabled by default).\n\n"
>   );
>   
>   static int udb_setup(char *str)
> @@ -513,9 +519,17 @@ static void ubd_handler(void)
>   		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>   			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>   
> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
> -				__blk_mq_end_request(io_req->req, io_req->error);
> -
> +			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
> +				blk_queue_max_discard_sectors(io_req->req->q, 0);
> +				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
> +				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
> +			}
> +			if ((io_req->error) || (io_req->buffer == NULL))
> +				blk_mq_end_request(io_req->req, io_req->error);
> +			else {
> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length))
> +					__blk_mq_end_request(io_req->req, io_req->error);
> +			}
>   			kfree(io_req);
>   		}
>   	}
> @@ -829,6 +843,14 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>   		if(err < 0) goto error;
>   		ubd_dev->cow.fd = err;
>   	}
> +	if (ubd_dev->no_trim == 0) {
> +		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
> +		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
> +		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
> +		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
> +		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
> +		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);


Sorry, messed it up when reapplying,  this should have set the zero 
limits as well, not repeat the discard ones.

A revised version will be resent shortly.

A



> +	}
>   	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
>   	return 0;
>    error:
> @@ -1372,6 +1394,10 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
>   	case REQ_OP_WRITE:
>   		ret = queue_rw_req(hctx, req);
>   		break;
> +	case REQ_OP_DISCARD:
> +	case REQ_OP_WRITE_ZEROES:
> +		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
> +		break;
>   	default:
>   		WARN_ON_ONCE(1);
>   		res = BLK_STS_NOTSUPP;
> @@ -1463,7 +1489,7 @@ static int update_bitmap(struct io_thread_req *req)
>   
>   	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
>   			  sizeof(req->bitmap_words), req->cow_offset);
> -	if(n != sizeof(req->bitmap_words))
> +	if (n != sizeof(req->bitmap_words))
>   		return map_error(-n);
>   
>   	return map_error(0);
> @@ -1471,11 +1497,13 @@ static int update_bitmap(struct io_thread_req *req)
>   
>   static void do_io(struct io_thread_req *req)
>   {
> -	char *buf;
> +	char *buf = NULL;
>   	unsigned long len;
>   	int n, nsectors, start, end, bit;
>   	__u64 off;
>   
> +	/* FLUSH is really a special case, we cannot "case" it with others */
> +
>   	if (req_op(req->req) == REQ_OP_FLUSH) {
>   		/* fds[0] is always either the rw image or our cow file */
>   		req->error = map_error(-os_sync_file(req->fds[0]));
> @@ -1495,26 +1523,42 @@ static void do_io(struct io_thread_req *req)
>   		off = req->offset + req->offsets[bit] +
>   			start * req->sectorsize;
>   		len = (end - start) * req->sectorsize;
> -		buf = &req->buffer[start * req->sectorsize];
> +		if (req->buffer != NULL)
> +			buf = &req->buffer[start * req->sectorsize];
>   
> -		if (req_op(req->req) == REQ_OP_READ) {
> +		switch (req_op(req->req)) {
> +		case REQ_OP_READ:
>   			n = 0;
>   			do {
>   				buf = &buf[n];
>   				len -= n;
>   				n = os_pread_file(req->fds[bit], buf, len, off);
> -				if(n < 0){
> +				if (n < 0) {
>   					req->error = map_error(-n);
>   					return;
>   				}
>   			} while((n < len) && (n != 0));
>   			if (n < len) memset(&buf[n], 0, len - n);
> -		} else {
> +			break;
> +		case REQ_OP_WRITE:
>   			n = os_pwrite_file(req->fds[bit], buf, len, off);
>   			if(n != len){
>   				req->error = map_error(-n);
>   				return;
>   			}
> +			break;
> +		case REQ_OP_DISCARD:
> +		case REQ_OP_WRITE_ZEROES:
> +			n = os_falloc_punch(req->fds[bit], off, len);
> +			if (n) {
> +				req->error = map_error(-n);
> +				return;
> +			}
> +			break;
> +		default:
> +			WARN_ON_ONCE(1);
> +			req->error = BLK_STS_NOTSUPP;
> +			return;
>   		}
>   
>   		start = end;
> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
> index 048ae37eb5aa..ebf23012a59b 100644
> --- a/arch/um/include/shared/os.h
> +++ b/arch/um/include/shared/os.h
> @@ -175,6 +175,7 @@ extern int os_fchange_dir(int fd);
>   extern unsigned os_major(unsigned long long dev);
>   extern unsigned os_minor(unsigned long long dev);
>   extern unsigned long long os_makedev(unsigned major, unsigned minor);
> +extern int os_falloc_punch(int fd, unsigned long long offset, int count);
>   
>   /* start_up.c */
>   extern void os_early_checks(void);
> diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
> index c0197097c86e..f25b110d4e70 100644
> --- a/arch/um/os-Linux/file.c
> +++ b/arch/um/os-Linux/file.c
> @@ -610,3 +610,13 @@ unsigned long long os_makedev(unsigned major, unsigned minor)
>   {
>   	return makedev(major, minor);
>   }
> +
> +int os_falloc_punch(int fd, unsigned long long offset, int len)
> +{
> +	int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
> +
> +	if (n < 0)
> +		return -errno;
> +	return n;
> +}
> +

-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-14 15:30   ` Christoph Hellwig
@ 2018-11-14 15:35     ` Anton Ivanov
  0 siblings, 0 replies; 15+ messages in thread
From: Anton Ivanov @ 2018-11-14 15:35 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: axboe, richard, linux-um

On 14/11/2018 15:30, Christoph Hellwig wrote:
> Discards are defined to be hints.  So if your fileystems return
> -EOPTNOTSUPP you just stop passing them to the file system and
> complete them from ->queue_rq.
Ack.
>
> Write Zeroes isn't.  So if you really want to support it you
> should manually write zeroes to the file as a fallback.

Hmm... Loop treats them as the same thing. I was looking at what they 
have done.

>   But do you
> have a real use case for Write Zeroes to start with?

I am not aware of.

There is a real case just for trim for now and I was using loop and nbd 
as a "guide" on what needs to be done. Loop has both DISCARD and ZERO.

Several other ops in the API looked interesting as well, but I have not 
seen them implemented anywhere so I skipped them for now.


-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 11:59 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
  2018-11-13 13:40   ` Jens Axboe
@ 2018-11-14 15:30   ` Christoph Hellwig
  2018-11-14 15:35     ` Anton Ivanov
  1 sibling, 1 reply; 15+ messages in thread
From: Christoph Hellwig @ 2018-11-14 15:30 UTC (permalink / raw)
  To: anton.ivanov; +Cc: axboe, richard, linux-um, hch

Discards are defined to be hints.  So if your fileystems return
-EOPTNOTSUPP you just stop passing them to the file system and
complete them from ->queue_rq.

Write Zeroes isn't.  So if you really want to support it you
should manually write zeroes to the file as a fallback.  But do you
have a real use case for Write Zeroes to start with?

_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-14  8:10 [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
@ 2018-11-14  8:10 ` anton.ivanov
  0 siblings, 0 replies; 15+ messages in thread
From: anton.ivanov @ 2018-11-14  8:10 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Support for DISCARD and WRITE_ZEROES in the ubd driver using
fallocate.

DISCARD is enabled by default and can be disabled using a new
UBD command line flag.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c  | 59 ++++++++++++++++++++++++++++++++++++---------
 arch/um/include/shared/os.h |  1 +
 arch/um/os-Linux/file.c     | 10 ++++++++
 3 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index c08d162f5dc9..8e1179143df6 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -155,6 +155,7 @@ struct ubd {
 	struct openflags openflags;
 	unsigned shared:1;
 	unsigned no_cow:1;
+	unsigned no_trim:1;
 	struct cow cow;
 	struct platform_device pdev;
 	struct request_queue *queue;
@@ -178,6 +179,7 @@ struct ubd {
 	.boot_openflags =	OPEN_FLAGS, \
 	.openflags =		OPEN_FLAGS, \
 	.no_cow =               0, \
+	.no_trim =		0, \
 	.shared =		0, \
 	.cow =			DEFAULT_COW, \
 	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
@@ -324,7 +326,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 		*index_out = n;
 
 	err = -EINVAL;
-	for (i = 0; i < sizeof("rscd="); i++) {
+	for (i = 0; i < sizeof("rscdt="); i++) {
 		switch (*str) {
 		case 'r':
 			flags.w = 0;
@@ -338,12 +340,15 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 		case 'c':
 			ubd_dev->shared = 1;
 			break;
+		case 't':
+			ubd_dev->no_trim = 1;
+			break;
 		case '=':
 			str++;
 			goto break_loop;
 		default:
 			*error_out = "Expected '=' or flag letter "
-				"(r, s, c, or d)";
+				"(r, s, c, t or d)";
 			goto out;
 		}
 		str++;
@@ -416,6 +421,7 @@ __uml_help(ubd_setup,
 "    'c' will cause the device to be treated as being shared between multiple\n"
 "    UMLs and file locking will be turned off - this is appropriate for a\n"
 "    cluster filesystem and inappropriate at almost all other times.\n\n"
+"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 );
 
 static int udb_setup(char *str)
@@ -514,9 +520,12 @@ static void ubd_handler(void)
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 
-			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
-				__blk_mq_end_request(io_req->req, io_req->error);
-
+			if ((io_req->error) || (io_req->buffer == NULL))
+				blk_mq_end_request(io_req->req, io_req->error);
+			else {
+				if (!blk_update_request(io_req->req, io_req->error, io_req->length))
+					__blk_mq_end_request(io_req->req, io_req->error);
+			}
 			kfree(io_req);
 		}
 	}
@@ -830,6 +839,12 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 		if(err < 0) goto error;
 		ubd_dev->cow.fd = err;
 	}
+	if (ubd_dev->no_trim == 0) {
+		ubd_dev->queue->limits.discard_granularity = UBD_SECTOR_SIZE;
+		ubd_dev->queue->limits.discard_alignment = UBD_SECTOR_SIZE;
+		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
+	}
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 	return 0;
  error:
@@ -1369,6 +1384,10 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 			}
 		}
 		break;
+	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
+		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		spin_unlock_irq(&ubd_dev->lock);
@@ -1461,7 +1480,7 @@ static int update_bitmap(struct io_thread_req *req)
 
 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
 			  sizeof(req->bitmap_words), req->cow_offset);
-	if(n != sizeof(req->bitmap_words))
+	if (n != sizeof(req->bitmap_words))
 		return map_error(-n);
 
 	return map_error(0);
@@ -1469,11 +1488,13 @@ static int update_bitmap(struct io_thread_req *req)
 
 static void do_io(struct io_thread_req *req)
 {
-	char *buf;
+	char *buf = NULL;
 	unsigned long len;
 	int n, nsectors, start, end, bit;
 	__u64 off;
 
+	/* FLUSH is really a special case, we cannot "case" it with others */
+
 	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
 		req->error = map_error(-os_sync_file(req->fds[0]));
@@ -1493,26 +1514,42 @@ static void do_io(struct io_thread_req *req)
 		off = req->offset + req->offsets[bit] +
 			start * req->sectorsize;
 		len = (end - start) * req->sectorsize;
-		buf = &req->buffer[start * req->sectorsize];
+		if (req->buffer != NULL)
+			buf = &req->buffer[start * req->sectorsize];
 
-		if (req_op(req->req) == REQ_OP_READ) {
+		switch (req_op(req->req)) {
+		case REQ_OP_READ:
 			n = 0;
 			do {
 				buf = &buf[n];
 				len -= n;
 				n = os_pread_file(req->fds[bit], buf, len, off);
-				if(n < 0){
+				if (n < 0) {
 					req->error = map_error(-n);
 					return;
 				}
 			} while((n < len) && (n != 0));
 			if (n < len) memset(&buf[n], 0, len - n);
-		} else {
+			break;
+		case REQ_OP_WRITE:
 			n = os_pwrite_file(req->fds[bit], buf, len, off);
 			if(n != len){
 				req->error = map_error(-n);
 				return;
 			}
+			break;
+		case REQ_OP_DISCARD:
+		case REQ_OP_WRITE_ZEROES:
+			n = os_falloc_punch(req->fds[bit], off, len);
+			if (n) {
+				req->error = map_error(-n);
+				return;
+			}
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			req->error = BLK_STS_NOTSUPP;
+			return;
 		}
 
 		start = end;
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 048ae37eb5aa..ebf23012a59b 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -175,6 +175,7 @@ extern int os_fchange_dir(int fd);
 extern unsigned os_major(unsigned long long dev);
 extern unsigned os_minor(unsigned long long dev);
 extern unsigned long long os_makedev(unsigned major, unsigned minor);
+extern int os_falloc_punch(int fd, unsigned long long offset, int count);
 
 /* start_up.c */
 extern void os_early_checks(void);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index c0197097c86e..f25b110d4e70 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -610,3 +610,13 @@ unsigned long long os_makedev(unsigned major, unsigned minor)
 {
 	return makedev(major, minor);
 }
+
+int os_falloc_punch(int fd, unsigned long long offset, int len)
+{
+	int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 15:49           ` Jens Axboe
@ 2018-11-13 15:52             ` Anton Ivanov
  0 siblings, 0 replies; 15+ messages in thread
From: Anton Ivanov @ 2018-11-13 15:52 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch

On 13/11/2018 15:49, Jens Axboe wrote:
> On 11/13/18 8:32 AM, Anton Ivanov wrote:
>> On 11/13/18 3:18 PM, Jens Axboe wrote:
>>> On 11/13/18 7:04 AM, Anton Ivanov wrote:
>>>> On 13/11/2018 13:40, Jens Axboe wrote:
>>>>> On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
>>>>>> index 2b1aaf7755aa..e817fd4a4c30 100644
>>>>>> --- a/arch/um/drivers/Kconfig
>>>>>> +++ b/arch/um/drivers/Kconfig
>>>>>> @@ -1,4 +1,25 @@
>>>>>>     # SPDX-License-Identifier: GPL-2.0
>>>>>> +menu "UML Block Devices"
>>>>>> +
>>>>>> +config BLK_DEV_UBD
>>>>>> +	bool "UBD Block Device"
>>>>>> +	default y
>>>>>> +	help
>>>>>> +          User Mode Linux virtual block device driver
>>>>>> +
>>>>>> +config BLK_DEV_UBD_SYNC
>>>>>> +	bool "Use Synchronous mode for UBD"
>>>>>> +	default n
>>>>>> +	help
>>>>>> +          Perform all disk operations synchronously (extremely slow).
>>>>>> +
>>>>>> +config BLK_DEV_UBD_DISCARD
>>>>>> +	bool "Enable DISCARD/TRIM support in UBD"
>>>>>> +	default y
>>>>>> +	help
>>>>>> +          Enable discard/trim support. Requires host kernel 3.x or above and
>>>>>> +	  may not be supported on all host filesystems.
>>>>>> +endmenu
>>>>> kconfig entries are horrible ways to manage this. A module parameter
>>>>> would be better. The latter also gets rid of the various ifdefs.
>>>> I completely agree with you. Unfortunately, UBD for a number of reasons
>>>> cannot be a module at present. It contains "host" side and loading
>>>> something from the "inside" which breaks the hypervisor boundary is a
>>>> definitive no go.
>>>>
>>>> In order to implement the suggestion we need the concept of loadable
>>>> host side modules and while that is possible the likelihood of someone
>>>> doing all the work on this one at present is rather low.
>>> Module parameters work for builtin as well, then you just add
>>> ubd.discard=true to the kernel command line.
>>>
>>>>>> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
>>>>>> index e78f27f5ce88..4fd4cb68f033 100644
>>>>>> --- a/arch/um/drivers/ubd_kern.c
>>>>>> +++ b/arch/um/drivers/ubd_kern.c
>>>>>> @@ -513,9 +513,13 @@ static void ubd_handler(void)
>>>>>>     		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>>>>>>     			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>>>>>>     
>>>>>> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>>>>>> -				__blk_mq_end_request(io_req->req, io_req->error);
>>>>>> -
>>>>>> +			if ((io_req->error) || (io_req->buffer == NULL)) {
>>>>>> +				blk_mq_end_request(io_req->req, io_req->error);
>>>>>> +			} else {
>>>>>> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
>>>>>> +					__blk_mq_end_request(io_req->req, io_req->error);
>>>>>> +				}
>>>>>> +			}
>>>>>>     			kfree(io_req);
>>>>> This looks odd, why is this change necessary? Also a lot of unneeded
>>>>> parentheses.
>>>> Loop and NBD do it that way. I can retest with the original code now
>>>> that the underlying culprit is known and resubmit if that works.
>>> I missed the __ part, yes it actually looks fine. blk_mq_end_request() will
>>> both end the bio parts and free the request, your latter part does incremental
>>> completes.
>>>
>>>>>> @@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>>>>     	char **back_ptr;
>>>>>>     	int err, create_cow, *create_ptr;
>>>>>>     	int fd;
>>>>>> +	int data;
>>>>>>     
>>>>>>     	ubd_dev->openflags = ubd_dev->boot_openflags;
>>>>>>     	create_cow = 0;
>>>>>> @@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>>>>     		if(err < 0) goto error;
>>>>>>     		ubd_dev->cow.fd = err;
>>>>>>     	}
>>>>>> +#ifdef CONFIG_BLK_DEV_UBD_DISCARD
>>>>>> +	if (ubd_dev->cow.file != NULL)
>>>>>> +		fd = ubd_dev->cow.fd;
>>>>>> +	else
>>>>>> +		fd = ubd_dev->fd;
>>>>>> +	err = os_pread_file(fd, &data, sizeof(int), 0);
>>>>>> +	if (err == sizeof(int)) {
>>>>>> +		err = os_falloc_punch(fd, 0, sizeof(int));
>>>>>> +		if (err == 0) {
>>>>> It's not clear to me how it's safe to punch a 4-byte hole in the file as
>>>>> a test?
>>>> A sizeof(int) sized hole is always implemented by simply writing zeroes.
>>>> There is no fs manipulation here.
>>>>
>>>> The test reads the contents of the file, punches a hoile and writes the
>>>> content back if the hole was punched successfully so the file is in its
>>>> orginal state.
>>>>
>>>> Life would have been much easier if fallocate did not return -EINVAL on
>>>> a 0 sized request. Unfortunately it does so any test has no choice but
>>>> to punch a hole for real and fix the damage after that.
>>>>
>>>> The location is completely safe for most UMLs UBDs. The most common
>>>> method of building images is to format a device directly with ext2/3/4 -
>>>> there is no partition table. The first sector in ext2/3/4 if memory
>>>> serves me right is unused. Other fs - less so. Partition table - totally
>>>> safe as the boot code portion is unused.
>>>>
>>>> In any case, the code restores the data to its pre-test state.
>>> It's a bit nasty... You'd probably want to do a full sector though, the
>>> fs isn't going to be issuing a trim for anything that small.
>>>
>>> What happens if the system crashes before you successfully write back
>>> the data you zeroed? Doesn't seem like a really safe way to do this.
>> There is no safe way to do this as far as I can see :)
>>
>> So if any probing is to be done it is inherently unsafe courtesy of
>> fallocate returning EINVAL on zero sized requests.
>>
>> IMHO there is no point to do probing to start off with. Issuing a trim
>> if the underlying fs does not support it does not result in anything
>> visible to the user. I tried to trim ext4 images sitting on a MSDOS
>> filesystem - there is no issues as the error is eaten somewhere inside
>> the kernel. Nothing showing up to the user and no visible damage :)
>>
>> However, Richard asked for a probe. That is how a reliable probe would
>> look like. I am not happy with it either and I think it is too risky for
>> a lot of scenarios. If we are to probe, we will have to keep some piece
>> of similar code in.
> So here's what I suggest - default to discard being available, don't
> offer WRITE_ZEROES support. Don't set the flag that tells the block
> layer that discards guarantees zeroes. Have an internal flag that is set
> when discards are enabled. If a discard fails, then clear your internal
> flag and the queue support. You must still expect discards to arrive, if
> they do and your internal discard flag is not set, you just end them,
> not in error.
>
> That'll just treat discards as hints, and you don't have to have any
> broken probe logic.
>
Sounds good. I will implement this round of comments, retest and 
resubmit the series tomorrow.

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 15:32         ` Anton Ivanov
@ 2018-11-13 15:49           ` Jens Axboe
  2018-11-13 15:52             ` Anton Ivanov
  0 siblings, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2018-11-13 15:49 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard, hch

On 11/13/18 8:32 AM, Anton Ivanov wrote:
> 
> On 11/13/18 3:18 PM, Jens Axboe wrote:
>> On 11/13/18 7:04 AM, Anton Ivanov wrote:
>>> On 13/11/2018 13:40, Jens Axboe wrote:
>>>> On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
>>>>> index 2b1aaf7755aa..e817fd4a4c30 100644
>>>>> --- a/arch/um/drivers/Kconfig
>>>>> +++ b/arch/um/drivers/Kconfig
>>>>> @@ -1,4 +1,25 @@
>>>>>    # SPDX-License-Identifier: GPL-2.0
>>>>> +menu "UML Block Devices"
>>>>> +
>>>>> +config BLK_DEV_UBD
>>>>> +	bool "UBD Block Device"
>>>>> +	default y
>>>>> +	help
>>>>> +          User Mode Linux virtual block device driver
>>>>> +
>>>>> +config BLK_DEV_UBD_SYNC
>>>>> +	bool "Use Synchronous mode for UBD"
>>>>> +	default n
>>>>> +	help
>>>>> +          Perform all disk operations synchronously (extremely slow).
>>>>> +
>>>>> +config BLK_DEV_UBD_DISCARD
>>>>> +	bool "Enable DISCARD/TRIM support in UBD"
>>>>> +	default y
>>>>> +	help
>>>>> +          Enable discard/trim support. Requires host kernel 3.x or above and
>>>>> +	  may not be supported on all host filesystems.
>>>>> +endmenu
>>>> kconfig entries are horrible ways to manage this. A module parameter
>>>> would be better. The latter also gets rid of the various ifdefs.
>>> I completely agree with you. Unfortunately, UBD for a number of reasons
>>> cannot be a module at present. It contains "host" side and loading
>>> something from the "inside" which breaks the hypervisor boundary is a
>>> definitive no go.
>>>
>>> In order to implement the suggestion we need the concept of loadable
>>> host side modules and while that is possible the likelihood of someone
>>> doing all the work on this one at present is rather low.
>> Module parameters work for builtin as well, then you just add
>> ubd.discard=true to the kernel command line.
>>
>>>>> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
>>>>> index e78f27f5ce88..4fd4cb68f033 100644
>>>>> --- a/arch/um/drivers/ubd_kern.c
>>>>> +++ b/arch/um/drivers/ubd_kern.c
>>>>> @@ -513,9 +513,13 @@ static void ubd_handler(void)
>>>>>    		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>>>>>    			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>>>>>    
>>>>> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>>>>> -				__blk_mq_end_request(io_req->req, io_req->error);
>>>>> -
>>>>> +			if ((io_req->error) || (io_req->buffer == NULL)) {
>>>>> +				blk_mq_end_request(io_req->req, io_req->error);
>>>>> +			} else {
>>>>> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
>>>>> +					__blk_mq_end_request(io_req->req, io_req->error);
>>>>> +				}
>>>>> +			}
>>>>>    			kfree(io_req);
>>>> This looks odd, why is this change necessary? Also a lot of unneeded
>>>> parentheses.
>>> Loop and NBD do it that way. I can retest with the original code now
>>> that the underlying culprit is known and resubmit if that works.
>> I missed the __ part, yes it actually looks fine. blk_mq_end_request() will
>> both end the bio parts and free the request, your latter part does incremental
>> completes.
>>
>>>>> @@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>>>    	char **back_ptr;
>>>>>    	int err, create_cow, *create_ptr;
>>>>>    	int fd;
>>>>> +	int data;
>>>>>    
>>>>>    	ubd_dev->openflags = ubd_dev->boot_openflags;
>>>>>    	create_cow = 0;
>>>>> @@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>>>    		if(err < 0) goto error;
>>>>>    		ubd_dev->cow.fd = err;
>>>>>    	}
>>>>> +#ifdef CONFIG_BLK_DEV_UBD_DISCARD
>>>>> +	if (ubd_dev->cow.file != NULL)
>>>>> +		fd = ubd_dev->cow.fd;
>>>>> +	else
>>>>> +		fd = ubd_dev->fd;
>>>>> +	err = os_pread_file(fd, &data, sizeof(int), 0);
>>>>> +	if (err == sizeof(int)) {
>>>>> +		err = os_falloc_punch(fd, 0, sizeof(int));
>>>>> +		if (err == 0) {
>>>> It's not clear to me how it's safe to punch a 4-byte hole in the file as
>>>> a test?
>>> A sizeof(int) sized hole is always implemented by simply writing zeroes.
>>> There is no fs manipulation here.
>>>
>>> The test reads the contents of the file, punches a hoile and writes the
>>> content back if the hole was punched successfully so the file is in its
>>> orginal state.
>>>
>>> Life would have been much easier if fallocate did not return -EINVAL on
>>> a 0 sized request. Unfortunately it does so any test has no choice but
>>> to punch a hole for real and fix the damage after that.
>>>
>>> The location is completely safe for most UMLs UBDs. The most common
>>> method of building images is to format a device directly with ext2/3/4 -
>>> there is no partition table. The first sector in ext2/3/4 if memory
>>> serves me right is unused. Other fs - less so. Partition table - totally
>>> safe as the boot code portion is unused.
>>>
>>> In any case, the code restores the data to its pre-test state.
>> It's a bit nasty... You'd probably want to do a full sector though, the
>> fs isn't going to be issuing a trim for anything that small.
>>
>> What happens if the system crashes before you successfully write back
>> the data you zeroed? Doesn't seem like a really safe way to do this.
> 
> There is no safe way to do this as far as I can see :)
> 
> So if any probing is to be done it is inherently unsafe courtesy of 
> fallocate returning EINVAL on zero sized requests.
> 
> IMHO there is no point to do probing to start off with. Issuing a trim 
> if the underlying fs does not support it does not result in anything 
> visible to the user. I tried to trim ext4 images sitting on a MSDOS 
> filesystem - there is no issues as the error is eaten somewhere inside 
> the kernel. Nothing showing up to the user and no visible damage :)
> 
> However, Richard asked for a probe. That is how a reliable probe would 
> look like. I am not happy with it either and I think it is too risky for 
> a lot of scenarios. If we are to probe, we will have to keep some piece 
> of similar code in.

So here's what I suggest - default to discard being available, don't
offer WRITE_ZEROES support. Don't set the flag that tells the block
layer that discards guarantees zeroes. Have an internal flag that is set
when discards are enabled. If a discard fails, then clear your internal
flag and the queue support. You must still expect discards to arrive, if
they do and your internal discard flag is not set, you just end them,
not in error.

That'll just treat discards as hints, and you don't have to have any
broken probe logic.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 15:18       ` Jens Axboe
@ 2018-11-13 15:32         ` Anton Ivanov
  2018-11-13 15:49           ` Jens Axboe
  0 siblings, 1 reply; 15+ messages in thread
From: Anton Ivanov @ 2018-11-13 15:32 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch


On 11/13/18 3:18 PM, Jens Axboe wrote:
> On 11/13/18 7:04 AM, Anton Ivanov wrote:
>> On 13/11/2018 13:40, Jens Axboe wrote:
>>> On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
>>>> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
>>>> index 2b1aaf7755aa..e817fd4a4c30 100644
>>>> --- a/arch/um/drivers/Kconfig
>>>> +++ b/arch/um/drivers/Kconfig
>>>> @@ -1,4 +1,25 @@
>>>>    # SPDX-License-Identifier: GPL-2.0
>>>> +menu "UML Block Devices"
>>>> +
>>>> +config BLK_DEV_UBD
>>>> +	bool "UBD Block Device"
>>>> +	default y
>>>> +	help
>>>> +          User Mode Linux virtual block device driver
>>>> +
>>>> +config BLK_DEV_UBD_SYNC
>>>> +	bool "Use Synchronous mode for UBD"
>>>> +	default n
>>>> +	help
>>>> +          Perform all disk operations synchronously (extremely slow).
>>>> +
>>>> +config BLK_DEV_UBD_DISCARD
>>>> +	bool "Enable DISCARD/TRIM support in UBD"
>>>> +	default y
>>>> +	help
>>>> +          Enable discard/trim support. Requires host kernel 3.x or above and
>>>> +	  may not be supported on all host filesystems.
>>>> +endmenu
>>> kconfig entries are horrible ways to manage this. A module parameter
>>> would be better. The latter also gets rid of the various ifdefs.
>> I completely agree with you. Unfortunately, UBD for a number of reasons
>> cannot be a module at present. It contains "host" side and loading
>> something from the "inside" which breaks the hypervisor boundary is a
>> definitive no go.
>>
>> In order to implement the suggestion we need the concept of loadable
>> host side modules and while that is possible the likelihood of someone
>> doing all the work on this one at present is rather low.
> Module parameters work for builtin as well, then you just add
> ubd.discard=true to the kernel command line.
>
>>>> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
>>>> index e78f27f5ce88..4fd4cb68f033 100644
>>>> --- a/arch/um/drivers/ubd_kern.c
>>>> +++ b/arch/um/drivers/ubd_kern.c
>>>> @@ -513,9 +513,13 @@ static void ubd_handler(void)
>>>>    		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>>>>    			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>>>>    
>>>> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>>>> -				__blk_mq_end_request(io_req->req, io_req->error);
>>>> -
>>>> +			if ((io_req->error) || (io_req->buffer == NULL)) {
>>>> +				blk_mq_end_request(io_req->req, io_req->error);
>>>> +			} else {
>>>> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
>>>> +					__blk_mq_end_request(io_req->req, io_req->error);
>>>> +				}
>>>> +			}
>>>>    			kfree(io_req);
>>> This looks odd, why is this change necessary? Also a lot of unneeded
>>> parentheses.
>> Loop and NBD do it that way. I can retest with the original code now
>> that the underlying culprit is known and resubmit if that works.
> I missed the __ part, yes it actually looks fine. blk_mq_end_request() will
> both end the bio parts and free the request, your latter part does incremental
> completes.
>
>>>> @@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>>    	char **back_ptr;
>>>>    	int err, create_cow, *create_ptr;
>>>>    	int fd;
>>>> +	int data;
>>>>    
>>>>    	ubd_dev->openflags = ubd_dev->boot_openflags;
>>>>    	create_cow = 0;
>>>> @@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>>    		if(err < 0) goto error;
>>>>    		ubd_dev->cow.fd = err;
>>>>    	}
>>>> +#ifdef CONFIG_BLK_DEV_UBD_DISCARD
>>>> +	if (ubd_dev->cow.file != NULL)
>>>> +		fd = ubd_dev->cow.fd;
>>>> +	else
>>>> +		fd = ubd_dev->fd;
>>>> +	err = os_pread_file(fd, &data, sizeof(int), 0);
>>>> +	if (err == sizeof(int)) {
>>>> +		err = os_falloc_punch(fd, 0, sizeof(int));
>>>> +		if (err == 0) {
>>> It's not clear to me how it's safe to punch a 4-byte hole in the file as
>>> a test?
>> A sizeof(int) sized hole is always implemented by simply writing zeroes.
>> There is no fs manipulation here.
>>
>> The test reads the contents of the file, punches a hoile and writes the
>> content back if the hole was punched successfully so the file is in its
>> orginal state.
>>
>> Life would have been much easier if fallocate did not return -EINVAL on
>> a 0 sized request. Unfortunately it does so any test has no choice but
>> to punch a hole for real and fix the damage after that.
>>
>> The location is completely safe for most UMLs UBDs. The most common
>> method of building images is to format a device directly with ext2/3/4 -
>> there is no partition table. The first sector in ext2/3/4 if memory
>> serves me right is unused. Other fs - less so. Partition table - totally
>> safe as the boot code portion is unused.
>>
>> In any case, the code restores the data to its pre-test state.
> It's a bit nasty... You'd probably want to do a full sector though, the
> fs isn't going to be issuing a trim for anything that small.
>
> What happens if the system crashes before you successfully write back
> the data you zeroed? Doesn't seem like a really safe way to do this.

There is no safe way to do this as far as I can see :)

So if any probing is to be done it is inherently unsafe courtesy of 
fallocate returning EINVAL on zero sized requests.

IMHO there is no point to do probing to start off with. Issuing a trim 
if the underlying fs does not support it does not result in anything 
visible to the user. I tried to trim ext4 images sitting on a MSDOS 
filesystem - there is no issues as the error is eaten somewhere inside 
the kernel. Nothing showing up to the user and no visible damage :)

However, Richard asked for a probe. That is how a reliable probe would 
look like. I am not happy with it either and I think it is too risky for 
a lot of scenarios. If we are to probe, we will have to keep some piece 
of similar code in.

-- 
Anton R. Ivanov

Cambridge Greys Limited, England and Wales company No 10273661
http://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 14:04     ` Anton Ivanov
@ 2018-11-13 15:18       ` Jens Axboe
  2018-11-13 15:32         ` Anton Ivanov
  0 siblings, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2018-11-13 15:18 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard, hch

On 11/13/18 7:04 AM, Anton Ivanov wrote:
> On 13/11/2018 13:40, Jens Axboe wrote:
>> On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
>>> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
>>> index 2b1aaf7755aa..e817fd4a4c30 100644
>>> --- a/arch/um/drivers/Kconfig
>>> +++ b/arch/um/drivers/Kconfig
>>> @@ -1,4 +1,25 @@
>>>   # SPDX-License-Identifier: GPL-2.0
>>> +menu "UML Block Devices"
>>> +
>>> +config BLK_DEV_UBD
>>> +	bool "UBD Block Device"
>>> +	default y
>>> +	help
>>> +          User Mode Linux virtual block device driver
>>> +
>>> +config BLK_DEV_UBD_SYNC
>>> +	bool "Use Synchronous mode for UBD"
>>> +	default n
>>> +	help
>>> +          Perform all disk operations synchronously (extremely slow).
>>> +
>>> +config BLK_DEV_UBD_DISCARD
>>> +	bool "Enable DISCARD/TRIM support in UBD"
>>> +	default y
>>> +	help
>>> +          Enable discard/trim support. Requires host kernel 3.x or above and
>>> +	  may not be supported on all host filesystems.
>>> +endmenu
>> kconfig entries are horrible ways to manage this. A module parameter
>> would be better. The latter also gets rid of the various ifdefs.
> 
> I completely agree with you. Unfortunately, UBD for a number of reasons 
> cannot be a module at present. It contains "host" side and loading 
> something from the "inside" which breaks the hypervisor boundary is a 
> definitive no go.
> 
> In order to implement the suggestion we need the concept of loadable 
> host side modules and while that is possible the likelihood of someone 
> doing all the work on this one at present is rather low.

Module parameters work for builtin as well, then you just add
ubd.discard=true to the kernel command line.

>>> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
>>> index e78f27f5ce88..4fd4cb68f033 100644
>>> --- a/arch/um/drivers/ubd_kern.c
>>> +++ b/arch/um/drivers/ubd_kern.c
>>> @@ -513,9 +513,13 @@ static void ubd_handler(void)
>>>   		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>>>   			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>>>   
>>> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>>> -				__blk_mq_end_request(io_req->req, io_req->error);
>>> -
>>> +			if ((io_req->error) || (io_req->buffer == NULL)) {
>>> +				blk_mq_end_request(io_req->req, io_req->error);
>>> +			} else {
>>> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
>>> +					__blk_mq_end_request(io_req->req, io_req->error);
>>> +				}
>>> +			}
>>>   			kfree(io_req);
>> This looks odd, why is this change necessary? Also a lot of unneeded
>> parentheses.
> 
> Loop and NBD do it that way. I can retest with the original code now 
> that the underlying culprit is known and resubmit if that works.

I missed the __ part, yes it actually looks fine. blk_mq_end_request() will
both end the bio parts and free the request, your latter part does incremental
completes.

>>> @@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>   	char **back_ptr;
>>>   	int err, create_cow, *create_ptr;
>>>   	int fd;
>>> +	int data;
>>>   
>>>   	ubd_dev->openflags = ubd_dev->boot_openflags;
>>>   	create_cow = 0;
>>> @@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>>   		if(err < 0) goto error;
>>>   		ubd_dev->cow.fd = err;
>>>   	}
>>> +#ifdef CONFIG_BLK_DEV_UBD_DISCARD
>>> +	if (ubd_dev->cow.file != NULL)
>>> +		fd = ubd_dev->cow.fd;
>>> +	else
>>> +		fd = ubd_dev->fd;
>>> +	err = os_pread_file(fd, &data, sizeof(int), 0);
>>> +	if (err == sizeof(int)) {
>>> +		err = os_falloc_punch(fd, 0, sizeof(int));
>>> +		if (err == 0) {
>> It's not clear to me how it's safe to punch a 4-byte hole in the file as
>> a test?
> 
> A sizeof(int) sized hole is always implemented by simply writing zeroes. 
> There is no fs manipulation here.
> 
> The test reads the contents of the file, punches a hoile and writes the 
> content back if the hole was punched successfully so the file is in its 
> orginal state.
> 
> Life would have been much easier if fallocate did not return -EINVAL on 
> a 0 sized request. Unfortunately it does so any test has no choice but 
> to punch a hole for real and fix the damage after that.
> 
> The location is completely safe for most UMLs UBDs. The most common 
> method of building images is to format a device directly with ext2/3/4 - 
> there is no partition table. The first sector in ext2/3/4 if memory 
> serves me right is unused. Other fs - less so. Partition table - totally 
> safe as the boot code portion is unused.
> 
> In any case, the code restores the data to its pre-test state.

It's a bit nasty... You'd probably want to do a full sector though, the
fs isn't going to be issuing a trim for anything that small.

What happens if the system crashes before you successfully write back
the data you zeroed? Doesn't seem like a really safe way to do this.

-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 13:40   ` Jens Axboe
@ 2018-11-13 14:04     ` Anton Ivanov
  2018-11-13 15:18       ` Jens Axboe
  0 siblings, 1 reply; 15+ messages in thread
From: Anton Ivanov @ 2018-11-13 14:04 UTC (permalink / raw)
  To: Jens Axboe, linux-um; +Cc: richard, hch

On 13/11/2018 13:40, Jens Axboe wrote:
> On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
>> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
>> index 2b1aaf7755aa..e817fd4a4c30 100644
>> --- a/arch/um/drivers/Kconfig
>> +++ b/arch/um/drivers/Kconfig
>> @@ -1,4 +1,25 @@
>>   # SPDX-License-Identifier: GPL-2.0
>> +menu "UML Block Devices"
>> +
>> +config BLK_DEV_UBD
>> +	bool "UBD Block Device"
>> +	default y
>> +	help
>> +          User Mode Linux virtual block device driver
>> +
>> +config BLK_DEV_UBD_SYNC
>> +	bool "Use Synchronous mode for UBD"
>> +	default n
>> +	help
>> +          Perform all disk operations synchronously (extremely slow).
>> +
>> +config BLK_DEV_UBD_DISCARD
>> +	bool "Enable DISCARD/TRIM support in UBD"
>> +	default y
>> +	help
>> +          Enable discard/trim support. Requires host kernel 3.x or above and
>> +	  may not be supported on all host filesystems.
>> +endmenu
> kconfig entries are horrible ways to manage this. A module parameter
> would be better. The latter also gets rid of the various ifdefs.

I completely agree with you. Unfortunately, UBD for a number of reasons 
cannot be a module at present. It contains "host" side and loading 
something from the "inside" which breaks the hypervisor boundary is a 
definitive no go.

In order to implement the suggestion we need the concept of loadable 
host side modules and while that is possible the likelihood of someone 
doing all the work on this one at present is rather low.

>
>> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
>> index e78f27f5ce88..4fd4cb68f033 100644
>> --- a/arch/um/drivers/ubd_kern.c
>> +++ b/arch/um/drivers/ubd_kern.c
>> @@ -513,9 +513,13 @@ static void ubd_handler(void)
>>   		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>>   			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>>   
>> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
>> -				__blk_mq_end_request(io_req->req, io_req->error);
>> -
>> +			if ((io_req->error) || (io_req->buffer == NULL)) {
>> +				blk_mq_end_request(io_req->req, io_req->error);
>> +			} else {
>> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
>> +					__blk_mq_end_request(io_req->req, io_req->error);
>> +				}
>> +			}
>>   			kfree(io_req);
> This looks odd, why is this change necessary? Also a lot of unneeded
> parentheses.

Loop and NBD do it that way. I can retest with the original code now 
that the underlying culprit is known and resubmit if that works.

>
>>   		}
>>   	}
>> @@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>   	char **back_ptr;
>>   	int err, create_cow, *create_ptr;
>>   	int fd;
>> +	int data;
>>   
>>   	ubd_dev->openflags = ubd_dev->boot_openflags;
>>   	create_cow = 0;
>> @@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>>   		if(err < 0) goto error;
>>   		ubd_dev->cow.fd = err;
>>   	}
>> +#ifdef CONFIG_BLK_DEV_UBD_DISCARD
>> +	if (ubd_dev->cow.file != NULL)
>> +		fd = ubd_dev->cow.fd;
>> +	else
>> +		fd = ubd_dev->fd;
>> +	err = os_pread_file(fd, &data, sizeof(int), 0);
>> +	if (err == sizeof(int)) {
>> +		err = os_falloc_punch(fd, 0, sizeof(int));
>> +		if (err == 0) {
> It's not clear to me how it's safe to punch a 4-byte hole in the file as
> a test?

A sizeof(int) sized hole is always implemented by simply writing zeroes. 
There is no fs manipulation here.

The test reads the contents of the file, punches a hoile and writes the 
content back if the hole was punched successfully so the file is in its 
orginal state.

Life would have been much easier if fallocate did not return -EINVAL on 
a 0 sized request. Unfortunately it does so any test has no choice but 
to punch a hole for real and fix the damage after that.

The location is completely safe for most UMLs UBDs. The most common 
method of building images is to format a device directly with ext2/3/4 - 
there is no partition table. The first sector in ext2/3/4 if memory 
serves me right is unused. Other fs - less so. Partition table - totally 
safe as the boot code portion is unused.

In any case, the code restores the data to its pre-test state.

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 11:59 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
@ 2018-11-13 13:40   ` Jens Axboe
  2018-11-13 14:04     ` Anton Ivanov
  2018-11-14 15:30   ` Christoph Hellwig
  1 sibling, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2018-11-13 13:40 UTC (permalink / raw)
  To: anton.ivanov, linux-um; +Cc: richard, hch

On 11/13/18 4:59 AM, anton.ivanov@cambridgegreys.com wrote:
> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
> index 2b1aaf7755aa..e817fd4a4c30 100644
> --- a/arch/um/drivers/Kconfig
> +++ b/arch/um/drivers/Kconfig
> @@ -1,4 +1,25 @@
>  # SPDX-License-Identifier: GPL-2.0
> +menu "UML Block Devices"
> +
> +config BLK_DEV_UBD
> +	bool "UBD Block Device"
> +	default y
> +	help
> +          User Mode Linux virtual block device driver
> +
> +config BLK_DEV_UBD_SYNC
> +	bool "Use Synchronous mode for UBD"
> +	default n
> +	help
> +          Perform all disk operations synchronously (extremely slow).
> +
> +config BLK_DEV_UBD_DISCARD
> +	bool "Enable DISCARD/TRIM support in UBD"
> +	default y
> +	help
> +          Enable discard/trim support. Requires host kernel 3.x or above and
> +	  may not be supported on all host filesystems.
> +endmenu

kconfig entries are horrible ways to manage this. A module parameter
would be better. The latter also gets rid of the various ifdefs.

> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
> index e78f27f5ce88..4fd4cb68f033 100644
> --- a/arch/um/drivers/ubd_kern.c
> +++ b/arch/um/drivers/ubd_kern.c
> @@ -513,9 +513,13 @@ static void ubd_handler(void)
>  		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>  			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>  
> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
> -				__blk_mq_end_request(io_req->req, io_req->error);
> -
> +			if ((io_req->error) || (io_req->buffer == NULL)) {
> +				blk_mq_end_request(io_req->req, io_req->error);
> +			} else {
> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
> +					__blk_mq_end_request(io_req->req, io_req->error);
> +				}
> +			}
>  			kfree(io_req);

This looks odd, why is this change necessary? Also a lot of unneeded
parentheses.

>  		}
>  	}
> @@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>  	char **back_ptr;
>  	int err, create_cow, *create_ptr;
>  	int fd;
> +	int data;
>  
>  	ubd_dev->openflags = ubd_dev->boot_openflags;
>  	create_cow = 0;
> @@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>  		if(err < 0) goto error;
>  		ubd_dev->cow.fd = err;
>  	}
> +#ifdef CONFIG_BLK_DEV_UBD_DISCARD
> +	if (ubd_dev->cow.file != NULL)
> +		fd = ubd_dev->cow.fd;
> +	else
> +		fd = ubd_dev->fd;
> +	err = os_pread_file(fd, &data, sizeof(int), 0);
> +	if (err == sizeof(int)) {
> +		err = os_falloc_punch(fd, 0, sizeof(int));
> +		if (err == 0) {

It's not clear to me how it's safe to punch a 4-byte hole in the file as
a test?


-- 
Jens Axboe


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 4/4] um: Add support for DISCARD in the UBD Driver
  2018-11-13 11:59 Revised and fixed patchset anton.ivanov
@ 2018-11-13 11:59 ` anton.ivanov
  2018-11-13 13:40   ` Jens Axboe
  2018-11-14 15:30   ` Christoph Hellwig
  0 siblings, 2 replies; 15+ messages in thread
From: anton.ivanov @ 2018-11-13 11:59 UTC (permalink / raw)
  To: linux-um; +Cc: axboe, richard, hch, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Support for DISCARD and WRITE_ZEROES in the ubd driver using
fallocate. DISCARD capable devices are autodetected at open
time.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/Kconfig     | 21 +++++++++++++++
 arch/um/drivers/ubd_kern.c  | 66 ++++++++++++++++++++++++++++++++++++++-------
 arch/um/include/shared/os.h |  1 +
 arch/um/os-Linux/file.c     | 10 +++++++
 4 files changed, 89 insertions(+), 9 deletions(-)

diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index 2b1aaf7755aa..e817fd4a4c30 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -1,4 +1,25 @@
 # SPDX-License-Identifier: GPL-2.0
+menu "UML Block Devices"
+
+config BLK_DEV_UBD
+	bool "UBD Block Device"
+	default y
+	help
+          User Mode Linux virtual block device driver
+
+config BLK_DEV_UBD_SYNC
+	bool "Use Synchronous mode for UBD"
+	default n
+	help
+          Perform all disk operations synchronously (extremely slow).
+
+config BLK_DEV_UBD_DISCARD
+	bool "Enable DISCARD/TRIM support in UBD"
+	default y
+	help
+          Enable discard/trim support. Requires host kernel 3.x or above and
+	  may not be supported on all host filesystems.
+endmenu
 
 menu "UML Character Devices"
 
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index e78f27f5ce88..4fd4cb68f033 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -513,9 +513,13 @@ static void ubd_handler(void)
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 
-			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
-				__blk_mq_end_request(io_req->req, io_req->error);
-
+			if ((io_req->error) || (io_req->buffer == NULL)) {
+				blk_mq_end_request(io_req->req, io_req->error);
+			} else {
+				if (!blk_update_request(io_req->req, io_req->error, io_req->length)) {
+					__blk_mq_end_request(io_req->req, io_req->error);
+				}
+			}
 			kfree(io_req);
 		}
 	}
@@ -775,6 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 	char **back_ptr;
 	int err, create_cow, *create_ptr;
 	int fd;
+	int data;
 
 	ubd_dev->openflags = ubd_dev->boot_openflags;
 	create_cow = 0;
@@ -829,6 +834,23 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 		if(err < 0) goto error;
 		ubd_dev->cow.fd = err;
 	}
+#ifdef CONFIG_BLK_DEV_UBD_DISCARD
+	if (ubd_dev->cow.file != NULL)
+		fd = ubd_dev->cow.fd;
+	else
+		fd = ubd_dev->fd;
+	err = os_pread_file(fd, &data, sizeof(int), 0);
+	if (err == sizeof(int)) {
+		err = os_falloc_punch(fd, 0, sizeof(int));
+		if (err == 0) {
+			ubd_dev->queue->limits.discard_granularity = UBD_SECTOR_SIZE;
+			ubd_dev->queue->limits.discard_alignment = UBD_SECTOR_SIZE;
+			blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
+			blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
+			os_pwrite_file(fd, &data, sizeof(int), 0);
+		}
+	}
+#endif
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 	return 0;
  error:
@@ -1368,6 +1390,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 			}
 		}
 		break;
+#ifdef CONFIG_BLK_DEV_UBD_DISCARD
+	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
+		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
+		break;
+#endif
 	default:
 		WARN_ON_ONCE(1);
 		spin_unlock_irq(&ubd_dev->lock);
@@ -1447,7 +1475,7 @@ static int update_bitmap(struct io_thread_req *req)
 
 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
 			  sizeof(req->bitmap_words), req->cow_offset);
-	if(n != sizeof(req->bitmap_words))
+	if (n != sizeof(req->bitmap_words))
 		return map_error(-n);
 
 	return map_error(0);
@@ -1455,11 +1483,13 @@ static int update_bitmap(struct io_thread_req *req)
 
 static void do_io(struct io_thread_req *req)
 {
-	char *buf;
+	char *buf = NULL;
 	unsigned long len;
 	int n, nsectors, start, end, bit;
 	__u64 off;
 
+	/* FLUSH is really a special case, we cannot "case" it with others */
+
 	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
 		req->error = map_error(-os_sync_file(req->fds[0]));
@@ -1479,26 +1509,44 @@ static void do_io(struct io_thread_req *req)
 		off = req->offset + req->offsets[bit] +
 			start * req->sectorsize;
 		len = (end - start) * req->sectorsize;
-		buf = &req->buffer[start * req->sectorsize];
+		if (req->buffer != NULL)
+			buf = &req->buffer[start * req->sectorsize];
 
-		if (req_op(req->req) == REQ_OP_READ) {
+		switch (req_op(req->req)) {
+		case REQ_OP_READ:
 			n = 0;
 			do {
 				buf = &buf[n];
 				len -= n;
 				n = os_pread_file(req->fds[bit], buf, len, off);
-				if(n < 0){
+				if (n < 0) {
 					req->error = map_error(-n);
 					return;
 				}
 			} while((n < len) && (n != 0));
 			if (n < len) memset(&buf[n], 0, len - n);
-		} else {
+			break;
+		case REQ_OP_WRITE:
 			n = os_pwrite_file(req->fds[bit], buf, len, off);
 			if(n != len){
 				req->error = map_error(-n);
 				return;
 			}
+			break;
+#ifdef CONFIG_BLK_DEV_UBD_DISCARD
+		case REQ_OP_DISCARD:
+		case REQ_OP_WRITE_ZEROES:
+			n = os_falloc_punch(req->fds[bit], off, len);
+			if (n) {
+				req->error = map_error(-n);
+				return;
+			}
+			break;
+#endif
+		default:
+			WARN_ON_ONCE(1);
+			req->error = BLK_STS_NOTSUPP;
+			return;
 		}
 
 		start = end;
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 048ae37eb5aa..ebf23012a59b 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -175,6 +175,7 @@ extern int os_fchange_dir(int fd);
 extern unsigned os_major(unsigned long long dev);
 extern unsigned os_minor(unsigned long long dev);
 extern unsigned long long os_makedev(unsigned major, unsigned minor);
+extern int os_falloc_punch(int fd, unsigned long long offset, int count);
 
 /* start_up.c */
 extern void os_early_checks(void);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index c0197097c86e..f25b110d4e70 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -610,3 +610,13 @@ unsigned long long os_makedev(unsigned major, unsigned minor)
 {
 	return makedev(major, minor);
 }
+
+int os_falloc_punch(int fd, unsigned long long offset, int len)
+{
+	int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
-- 
2.11.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2018-11-14 18:31 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-14 17:09 [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
2018-11-14 17:09 ` [PATCH 2/4] um: Clean-up command processing in " anton.ivanov
2018-11-14 17:09 ` [PATCH 3/4] um: Remove unsafe printks from the io thread anton.ivanov
2018-11-14 17:09 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
2018-11-14 18:30   ` Anton Ivanov
  -- strict thread matches above, loose matches on Subject: below --
2018-11-14  8:10 [PATCH 1/4] um: Switch to block-mq constants in the UML UBD driver anton.ivanov
2018-11-14  8:10 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
2018-11-13 11:59 Revised and fixed patchset anton.ivanov
2018-11-13 11:59 ` [PATCH 4/4] um: Add support for DISCARD in the UBD Driver anton.ivanov
2018-11-13 13:40   ` Jens Axboe
2018-11-13 14:04     ` Anton Ivanov
2018-11-13 15:18       ` Jens Axboe
2018-11-13 15:32         ` Anton Ivanov
2018-11-13 15:49           ` Jens Axboe
2018-11-13 15:52             ` Anton Ivanov
2018-11-14 15:30   ` Christoph Hellwig
2018-11-14 15:35     ` Anton Ivanov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.