All of lore.kernel.org
 help / color / mirror / Atom feed
* [uml-devel] [PATCH 1/2] Update UBD to use pread/pwrite family of functions
@ 2015-12-21 18:54 Anton Ivanov
  2015-12-21 18:54 ` [uml-devel] [PATCH 2/2] Bulk IO Transaction support part 1 Anton Ivanov
  2016-01-10 15:57 ` [uml-devel] [PATCH 1/2] Update UBD to use pread/pwrite family of functions Richard Weinberger
  0 siblings, 2 replies; 9+ messages in thread
From: Anton Ivanov @ 2015-12-21 18:54 UTC (permalink / raw)
  To: user-mode-linux-devel; +Cc: richard, Anton Ivanov

This decreases the number of syscalls per read/write by half.

Signed-off-by: Anton Ivanov <aivanov@brocade.com>
---
 arch/um/drivers/ubd_kern.c  | 27 +++++----------------------
 arch/um/include/shared/os.h |  2 ++
 arch/um/os-Linux/file.c     | 19 +++++++++++++++++++
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index e8ab93c..39ba207 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -535,11 +535,7 @@ static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 {
 	int err;
 
-	err = os_seek_file(fd, offset);
-	if (err < 0)
-		return err;
-
-	err = os_read_file(fd, buf, len);
+	err = os_pread_file(fd, buf, len, offset);
 	if (err < 0)
 		return err;
 
@@ -1377,14 +1373,8 @@ static int update_bitmap(struct io_thread_req *req)
 	if(req->cow_offset == -1)
 		return 0;
 
-	n = os_seek_file(req->fds[1], req->cow_offset);
-	if(n < 0){
-		printk("do_io - bitmap lseek failed : err = %d\n", -n);
-		return 1;
-	}
-
-	n = os_write_file(req->fds[1], &req->bitmap_words,
-			  sizeof(req->bitmap_words));
+	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
+			  sizeof(req->bitmap_words), req->cow_offset);
 	if(n != sizeof(req->bitmap_words)){
 		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
 		       req->fds[1]);
@@ -1399,7 +1389,6 @@ static void do_io(struct io_thread_req *req)
 	char *buf;
 	unsigned long len;
 	int n, nsectors, start, end, bit;
-	int err;
 	__u64 off;
 
 	if (req->op == UBD_FLUSH) {
@@ -1428,18 +1417,12 @@ static void do_io(struct io_thread_req *req)
 		len = (end - start) * req->sectorsize;
 		buf = &req->buffer[start * req->sectorsize];
 
-		err = os_seek_file(req->fds[bit], off);
-		if(err < 0){
-			printk("do_io - lseek failed : err = %d\n", -err);
-			req->error = 1;
-			return;
-		}
 		if(req->op == UBD_READ){
 			n = 0;
 			do {
 				buf = &buf[n];
 				len -= n;
-				n = os_read_file(req->fds[bit], buf, len);
+				n = os_pread_file(req->fds[bit], buf, len, off);
 				if (n < 0) {
 					printk("do_io - read failed, err = %d "
 					       "fd = %d\n", -n, req->fds[bit]);
@@ -1449,7 +1432,7 @@ static void do_io(struct io_thread_req *req)
 			} while((n < len) && (n != 0));
 			if (n < len) memset(&buf[n], 0, len - n);
 		} else {
-			n = os_write_file(req->fds[bit], buf, len);
+			n = os_pwrite_file(req->fds[bit], buf, len, off);
 			if(n != len){
 				printk("do_io - write failed err = %d "
 				       "fd = %d\n", -n, req->fds[bit]);
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 868e6c3..7a04ddd 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -146,6 +146,8 @@ extern int os_read_file(int fd, void *buf, int len);
 extern int os_write_file(int fd, const void *buf, int count);
 extern int os_sync_file(int fd);
 extern int os_file_size(const char *file, unsigned long long *size_out);
+extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset);
+extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset);
 extern int os_file_modtime(const char *file, unsigned long *modtime);
 extern int os_pipe(int *fd, int stream, int close_on_exec);
 extern int os_set_fd_async(int fd);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 26e0164..2db18cb 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -264,6 +264,15 @@ int os_read_file(int fd, void *buf, int len)
 	return n;
 }
 
+int os_pread_file(int fd, void *buf, int len, unsigned long long offset)
+{
+	int n = pread(fd, buf, len, offset);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
 int os_write_file(int fd, const void *buf, int len)
 {
 	int n = write(fd, (void *) buf, len);
@@ -282,6 +291,16 @@ int os_sync_file(int fd)
 	return n;
 }
 
+int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset)
+{
+	int n = pwrite(fd, (void *) buf, len, offset);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
+
 int os_file_size(const char *file, unsigned long long *size_out)
 {
 	struct uml_stat buf;
-- 
2.1.4


------------------------------------------------------------------------------
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [uml-devel] [PATCH 2/2] Bulk IO Transaction support part 1
  2015-12-21 18:54 [uml-devel] [PATCH 1/2] Update UBD to use pread/pwrite family of functions Anton Ivanov
@ 2015-12-21 18:54 ` Anton Ivanov
  2015-12-21 19:04   ` Anton Ivanov
  2016-01-12 21:25   ` James McMechan
  2016-01-10 15:57 ` [uml-devel] [PATCH 1/2] Update UBD to use pread/pwrite family of functions Richard Weinberger
  1 sibling, 2 replies; 9+ messages in thread
From: Anton Ivanov @ 2015-12-21 18:54 UTC (permalink / raw)
  To: user-mode-linux-devel; +Cc: richard, Anton Ivanov

This patch adds support for merging notifications on the ubd
notification file descriptor. Multiple transactions are processed
at a time resulting in 10-15% virtual disk speed improvement.

The mechanics are rather primitive - no ring buffers, primitive
guaranteed flush and guaranteed to-record-size read.

Signed-off-by: Anton Ivanov <aivanov@brocade.com>
---
 arch/um/drivers/ubd.h       |   2 +
 arch/um/drivers/ubd_kern.c  | 156 ++++++++++++++++++++++++++++++++++++--------
 arch/um/drivers/ubd_user.c  |  19 +++++-
 arch/um/include/shared/os.h |   1 +
 arch/um/os-Linux/file.c     |  12 ++++
 5 files changed, 161 insertions(+), 29 deletions(-)

diff --git a/arch/um/drivers/ubd.h b/arch/um/drivers/ubd.h
index 3b48cd2..2c5e6fd 100644
--- a/arch/um/drivers/ubd.h
+++ b/arch/um/drivers/ubd.h
@@ -10,6 +10,8 @@
 extern int start_io_thread(unsigned long sp, int *fds_out);
 extern int io_thread(void *arg);
 extern int kernel_fd;
+extern void setup_ubd_pollfd(void * fds, int fd);
+extern int size_of_pollfd(void);
 
 #endif
 
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 39ba207..9f30c50 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -58,6 +58,12 @@ struct io_thread_req {
 	int error;
 };
 
+static void * kernel_pfd = NULL;
+
+struct io_thread_req **kernel_reqs;
+
+struct io_thread_req **helper_reqs;
+
 static inline int ubd_test_bit(__u64 bit, unsigned char *data)
 {
 	__u64 n;
@@ -442,6 +448,32 @@ static void do_ubd_request(struct request_queue * q);
 static int thread_fd = -1;
 static LIST_HEAD(restart);
 
+
+static int do_safe_read(int fd, void * buffer, int max_size, int record_size){
+	unsigned char * buf;
+	int n, size;
+
+	size = 0;
+	buf = (unsigned char *) buffer;
+
+	return os_read_file(fd, buf + size, max_size - size);
+
+	do {
+		n = os_read_file(fd, buf, max_size - size);
+		if ((size == 0) && (n == -EAGAIN))
+			return n;
+		if (n > 0)
+			size = size + n;
+		if ((n < 0) && (n != -EAGAIN))
+			return n;
+		if (size == max_size)
+			break;
+	} while ((size % record_size) != 0);
+
+	return size;
+}
+
+
 /* XXX - move this inside ubd_intr. */
 /* Called without dev->lock held, and only in interrupt context. */
 static void ubd_handler(void)
@@ -450,21 +482,37 @@ static void ubd_handler(void)
 	struct ubd *ubd;
 	struct list_head *list, *next_ele;
 	unsigned long flags;
-	int n;
+	int n, rcount, i;
 
 	while(1){
-		n = os_read_file(thread_fd, &req,
-				 sizeof(struct io_thread_req *));
-		if(n != sizeof(req)){
-			if(n == -EAGAIN)
-				break;
-			printk(KERN_ERR "spurious interrupt in ubd_handler, "
-			       "err = %d\n", -n);
-			return;
+		n = do_safe_read(thread_fd, helper_reqs,
+				 sizeof(struct io_thread_req *) * MAX_SG, 
+				 sizeof(struct io_thread_req *)
+				 );
+
+		if(n == -EAGAIN){
+			break;
+		}
+		if(n < 0){
+			printk("io_thread - read failed, fd = %d, "
+			       "err = %d\n", thread_fd, -n);
+		} 
+
+		if(n % sizeof(struct io_thread_req *) != 0){
+			printk("kernel_ubd_io - invalid read, fd = %d, "
+			       "read = %d\n", thread_fd, n);
+			continue;
 		}
 
-		blk_end_request(req->req, 0, req->length);
-		kfree(req);
+		rcount = n / sizeof(struct io_thread_req *);
+		
+		for (i = 0; i < rcount; i++) {
+
+			req = * (helper_reqs + i);
+			blk_end_request(req->req, 0, req->length);
+			kfree(req);
+
+		}
 	}
 	reactivate_fd(thread_fd, UBD_IRQ);
 
@@ -1080,6 +1128,26 @@ late_initcall(ubd_init);
 static int __init ubd_driver_init(void){
 	unsigned long stack;
 	int err;
+	
+	kernel_reqs = kmalloc(MAX_SG * sizeof(struct io_thread_req *), GFP_KERNEL);
+	if (kernel_reqs == NULL) {
+		printk("Failed to allocate memory for req buffer\n");
+		return 0;
+	}
+
+
+	kernel_pfd = kmalloc(size_of_pollfd(), GFP_KERNEL);
+	if (kernel_pfd == NULL) {
+		printk("Failed to allocate memory for pollfd\n");
+		return 0;
+	}
+
+	helper_reqs = kmalloc(MAX_SG * sizeof(struct io_thread_req *), GFP_KERNEL);
+	if (helper_reqs == NULL) {
+		printk("Failed to allocate memory for req buffer\n");
+		return 0;
+	}
+
 
 	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
 	if(global_openflags.s){
@@ -1458,30 +1526,62 @@ static int io_count = 0;
 int io_thread(void *arg)
 {
 	struct io_thread_req *req;
-	int n;
+	unsigned char * buffer;
+	int n, rcount, i;
 
 	os_fix_helper_signals();
 
+	printk("Starting UBD helper thread\n");
+
 	while(1){
-		n = os_read_file(kernel_fd, &req,
-				 sizeof(struct io_thread_req *));
-		if(n != sizeof(struct io_thread_req *)){
-			if(n < 0)
-				printk("io_thread - read failed, fd = %d, "
-				       "err = %d\n", kernel_fd, -n);
-			else {
-				printk("io_thread - short read, fd = %d, "
-				       "length = %d\n", kernel_fd, n);
-			}
+		setup_ubd_pollfd(kernel_pfd, kernel_fd);
+		os_poll(kernel_pfd, 1, -1);
+		n = do_safe_read(kernel_fd, kernel_reqs,
+				 sizeof(struct io_thread_req *) * MAX_SG,
+				 sizeof(struct io_thread_req *) * MAX_SG
+				 );
+		if(n == -EAGAIN){
 			continue;
 		}
-		io_count++;
-		do_io(req);
-		n = os_write_file(kernel_fd, &req,
-				  sizeof(struct io_thread_req *));
-		if(n != sizeof(struct io_thread_req *))
+		if(n < 0){
+			printk("io_thread - read failed, fd = %d, "
+			       "err = %d\n", kernel_fd, -n);
+		} 
+
+		if(n % sizeof(struct io_thread_req *) != 0){
+			printk("io_thread - invalid read, fd = %d, "
+			       "read = %d\n", kernel_fd, n);
+			continue;
+		}
+
+		rcount = n / sizeof(struct io_thread_req *);
+		
+		for (i = 0; i < rcount; i++) {
+
+			io_count++;
+
+			req = * (kernel_reqs + i);
+
+			do_io(req);
+
+		}
+
+		buffer = (unsigned char *) kernel_reqs;
+
+		while (n > 0) {
+			i = os_write_file(kernel_fd, buffer, n);
+			if(i >= 0){
+				buffer = buffer + i;
+				n = n - i;
+			} else {
+				if(i != -EAGAIN)
+					break;
+			}
+		} 
+
+		if(n > 0)
 			printk("io_thread - write failed, fd = %d, err = %d\n",
-			       kernel_fd, -n);
+			       kernel_fd, -i);
 	}
 
 	return 0;
diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
index e376f9b..3abec4f 100644
--- a/arch/um/drivers/ubd_user.c
+++ b/arch/um/drivers/ubd_user.c
@@ -17,10 +17,22 @@
 #include <sys/param.h>
 #include <endian.h>
 #include <byteswap.h>
+#include <poll.h>
 
 #include "ubd.h"
 #include <os.h>
 
+void setup_ubd_pollfd(void * fds, int fd) {
+	struct pollfd * pfds = (struct pollfd *) fds;
+	pfds->fd = fd;
+	pfds->events = POLLIN | POLLPRI;
+	pfds->revents = 0;
+}
+
+int size_of_pollfd(void) {
+	return sizeof(struct pollfd);
+}
+
 int start_io_thread(unsigned long sp, int *fd_out)
 {
 	int pid, fds[2], err;
@@ -36,10 +48,15 @@ int start_io_thread(unsigned long sp, int *fd_out)
 
 	err = os_set_fd_block(*fd_out, 0);
 	if (err) {
-		printk("start_io_thread - failed to set nonblocking I/O.\n");
+		printk("start_io_thread - failed to set nonblocking I/O - kernel_fd.\n");
 		goto out_close;
 	}
 
+	err = os_set_fd_block(kernel_fd, 0);
+	if (err) {
+		printk("start_io_thread - failed to set nonblocking I/O - user_fd.\n");
+		goto out_close;
+	}
 	pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL);
 	if(pid < 0){
 		err = -errno;
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 7a04ddd..0b75efa 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -149,6 +149,7 @@ extern int os_file_size(const char *file, unsigned long long *size_out);
 extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset);
 extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset);
 extern int os_file_modtime(const char *file, unsigned long *modtime);
+extern int os_poll(void *fds, unsigned int nfds, int timeout);
 extern int os_pipe(int *fd, int stream, int close_on_exec);
 extern int os_set_fd_async(int fd);
 extern int os_clear_fd_async(int fd);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 2db18cb..726b1e1 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -14,6 +14,7 @@
 #include <sys/stat.h>
 #include <sys/un.h>
 #include <sys/types.h>
+#include <poll.h>
 #include <os.h>
 
 static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
@@ -355,6 +356,17 @@ int os_file_modtime(const char *file, unsigned long *modtime)
 	return 0;
 }
 
+int os_poll(void *fds, unsigned int nfds, int timeout)
+{
+	int n;
+
+	CATCH_EINTR(n = poll((struct pollfd *) fds, nfds, timeout));
+	if (n < 0)
+		return -errno;
+
+	return n;
+}
+
 int os_set_exec_close(int fd)
 {
 	int err;
-- 
2.1.4


------------------------------------------------------------------------------
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [uml-devel] [PATCH 2/2] Bulk IO Transaction support part 1
  2015-12-21 18:54 ` [uml-devel] [PATCH 2/2] Bulk IO Transaction support part 1 Anton Ivanov
@ 2015-12-21 19:04   ` Anton Ivanov
  2016-01-10 16:00     ` Richard Weinberger
  2016-01-12 21:25   ` James McMechan
  1 sibling, 1 reply; 9+ messages in thread
From: Anton Ivanov @ 2015-12-21 19:04 UTC (permalink / raw)
  To: user-mode-linux-devel; +Cc: Richard Weinberger

Hi list, hi Richard,

This rather primitive patchset pushes disk IO by ~ 15%.

dd to /dev/null on a host memory cached 16G sparse disk image with 
ubuntu on it goes from 512MB/s on my machine to 580MB/s.

Part 2 will be ring buffers and read/write poll loop in the io_thread as 
well as elimination of "partial read/write" workarounds. While it does 
not push disk benchmarks a lot it makes UML more responsive.

Part 3 will be merging adjacent IO transactions (by file location) via 
vector IO. This pushes trhoughput even further by quite a bit.

While I can try to submit all of that in one go, I'd rather submit it in 
chunks so it is easier to review even if this will result in patch churn 
(the whole do_safe_read malarkey will bite the bullet once part 2 is out).

A.

On 21/12/15 18:54, Anton Ivanov wrote:
> This patch adds support for merging notifications on the ubd
> notification file descriptor. Multiple transactions are processed
> at a time resulting in 10-15% virtual disk speed improvement.
>
> The mechanics are rather primitive - no ring buffers, primitive
> guaranteed flush and guaranteed to-record-size read.
>
> Signed-off-by: Anton Ivanov <aivanov@brocade.com>
> ---
>   arch/um/drivers/ubd.h       |   2 +
>   arch/um/drivers/ubd_kern.c  | 156 ++++++++++++++++++++++++++++++++++++--------
>   arch/um/drivers/ubd_user.c  |  19 +++++-
>   arch/um/include/shared/os.h |   1 +
>   arch/um/os-Linux/file.c     |  12 ++++
>   5 files changed, 161 insertions(+), 29 deletions(-)
>
> diff --git a/arch/um/drivers/ubd.h b/arch/um/drivers/ubd.h
> index 3b48cd2..2c5e6fd 100644
> --- a/arch/um/drivers/ubd.h
> +++ b/arch/um/drivers/ubd.h
> @@ -10,6 +10,8 @@
>   extern int start_io_thread(unsigned long sp, int *fds_out);
>   extern int io_thread(void *arg);
>   extern int kernel_fd;
> +extern void setup_ubd_pollfd(void * fds, int fd);
> +extern int size_of_pollfd(void);
>   
>   #endif
>   
> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
> index 39ba207..9f30c50 100644
> --- a/arch/um/drivers/ubd_kern.c
> +++ b/arch/um/drivers/ubd_kern.c
> @@ -58,6 +58,12 @@ struct io_thread_req {
>   	int error;
>   };
>   
> +static void * kernel_pfd = NULL;
> +
> +struct io_thread_req **kernel_reqs;
> +
> +struct io_thread_req **helper_reqs;
> +
>   static inline int ubd_test_bit(__u64 bit, unsigned char *data)
>   {
>   	__u64 n;
> @@ -442,6 +448,32 @@ static void do_ubd_request(struct request_queue * q);
>   static int thread_fd = -1;
>   static LIST_HEAD(restart);
>   
> +
> +static int do_safe_read(int fd, void * buffer, int max_size, int record_size){
> +	unsigned char * buf;
> +	int n, size;
> +
> +	size = 0;
> +	buf = (unsigned char *) buffer;
> +
> +	return os_read_file(fd, buf + size, max_size - size);
> +
> +	do {
> +		n = os_read_file(fd, buf, max_size - size);
> +		if ((size == 0) && (n == -EAGAIN))
> +			return n;
> +		if (n > 0)
> +			size = size + n;
> +		if ((n < 0) && (n != -EAGAIN))
> +			return n;
> +		if (size == max_size)
> +			break;
> +	} while ((size % record_size) != 0);
> +
> +	return size;
> +}
> +
> +
>   /* XXX - move this inside ubd_intr. */
>   /* Called without dev->lock held, and only in interrupt context. */
>   static void ubd_handler(void)
> @@ -450,21 +482,37 @@ static void ubd_handler(void)
>   	struct ubd *ubd;
>   	struct list_head *list, *next_ele;
>   	unsigned long flags;
> -	int n;
> +	int n, rcount, i;
>   
>   	while(1){
> -		n = os_read_file(thread_fd, &req,
> -				 sizeof(struct io_thread_req *));
> -		if(n != sizeof(req)){
> -			if(n == -EAGAIN)
> -				break;
> -			printk(KERN_ERR "spurious interrupt in ubd_handler, "
> -			       "err = %d\n", -n);
> -			return;
> +		n = do_safe_read(thread_fd, helper_reqs,
> +				 sizeof(struct io_thread_req *) * MAX_SG,
> +				 sizeof(struct io_thread_req *)
> +				 );
> +
> +		if(n == -EAGAIN){
> +			break;
> +		}
> +		if(n < 0){
> +			printk("io_thread - read failed, fd = %d, "
> +			       "err = %d\n", thread_fd, -n);
> +		}
> +
> +		if(n % sizeof(struct io_thread_req *) != 0){
> +			printk("kernel_ubd_io - invalid read, fd = %d, "
> +			       "read = %d\n", thread_fd, n);
> +			continue;
>   		}
>   
> -		blk_end_request(req->req, 0, req->length);
> -		kfree(req);
> +		rcount = n / sizeof(struct io_thread_req *);
> +		
> +		for (i = 0; i < rcount; i++) {
> +
> +			req = * (helper_reqs + i);
> +			blk_end_request(req->req, 0, req->length);
> +			kfree(req);
> +
> +		}
>   	}
>   	reactivate_fd(thread_fd, UBD_IRQ);
>   
> @@ -1080,6 +1128,26 @@ late_initcall(ubd_init);
>   static int __init ubd_driver_init(void){
>   	unsigned long stack;
>   	int err;
> +	
> +	kernel_reqs = kmalloc(MAX_SG * sizeof(struct io_thread_req *), GFP_KERNEL);
> +	if (kernel_reqs == NULL) {
> +		printk("Failed to allocate memory for req buffer\n");
> +		return 0;
> +	}
> +
> +
> +	kernel_pfd = kmalloc(size_of_pollfd(), GFP_KERNEL);
> +	if (kernel_pfd == NULL) {
> +		printk("Failed to allocate memory for pollfd\n");
> +		return 0;
> +	}
> +
> +	helper_reqs = kmalloc(MAX_SG * sizeof(struct io_thread_req *), GFP_KERNEL);
> +	if (helper_reqs == NULL) {
> +		printk("Failed to allocate memory for req buffer\n");
> +		return 0;
> +	}
> +
>   
>   	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
>   	if(global_openflags.s){
> @@ -1458,30 +1526,62 @@ static int io_count = 0;
>   int io_thread(void *arg)
>   {
>   	struct io_thread_req *req;
> -	int n;
> +	unsigned char * buffer;
> +	int n, rcount, i;
>   
>   	os_fix_helper_signals();
>   
> +	printk("Starting UBD helper thread\n");
> +
>   	while(1){
> -		n = os_read_file(kernel_fd, &req,
> -				 sizeof(struct io_thread_req *));
> -		if(n != sizeof(struct io_thread_req *)){
> -			if(n < 0)
> -				printk("io_thread - read failed, fd = %d, "
> -				       "err = %d\n", kernel_fd, -n);
> -			else {
> -				printk("io_thread - short read, fd = %d, "
> -				       "length = %d\n", kernel_fd, n);
> -			}
> +		setup_ubd_pollfd(kernel_pfd, kernel_fd);
> +		os_poll(kernel_pfd, 1, -1);
> +		n = do_safe_read(kernel_fd, kernel_reqs,
> +				 sizeof(struct io_thread_req *) * MAX_SG,
> +				 sizeof(struct io_thread_req *) * MAX_SG
> +				 );
> +		if(n == -EAGAIN){
>   			continue;
>   		}
> -		io_count++;
> -		do_io(req);
> -		n = os_write_file(kernel_fd, &req,
> -				  sizeof(struct io_thread_req *));
> -		if(n != sizeof(struct io_thread_req *))
> +		if(n < 0){
> +			printk("io_thread - read failed, fd = %d, "
> +			       "err = %d\n", kernel_fd, -n);
> +		}
> +
> +		if(n % sizeof(struct io_thread_req *) != 0){
> +			printk("io_thread - invalid read, fd = %d, "
> +			       "read = %d\n", kernel_fd, n);
> +			continue;
> +		}
> +
> +		rcount = n / sizeof(struct io_thread_req *);
> +		
> +		for (i = 0; i < rcount; i++) {
> +
> +			io_count++;
> +
> +			req = * (kernel_reqs + i);
> +
> +			do_io(req);
> +
> +		}
> +
> +		buffer = (unsigned char *) kernel_reqs;
> +
> +		while (n > 0) {
> +			i = os_write_file(kernel_fd, buffer, n);
> +			if(i >= 0){
> +				buffer = buffer + i;
> +				n = n - i;
> +			} else {
> +				if(i != -EAGAIN)
> +					break;
> +			}
> +		}
> +
> +		if(n > 0)
>   			printk("io_thread - write failed, fd = %d, err = %d\n",
> -			       kernel_fd, -n);
> +			       kernel_fd, -i);
>   	}
>   
>   	return 0;
> diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
> index e376f9b..3abec4f 100644
> --- a/arch/um/drivers/ubd_user.c
> +++ b/arch/um/drivers/ubd_user.c
> @@ -17,10 +17,22 @@
>   #include <sys/param.h>
>   #include <endian.h>
>   #include <byteswap.h>
> +#include <poll.h>
>   
>   #include "ubd.h"
>   #include <os.h>
>   
> +void setup_ubd_pollfd(void * fds, int fd) {
> +	struct pollfd * pfds = (struct pollfd *) fds;
> +	pfds->fd = fd;
> +	pfds->events = POLLIN | POLLPRI;
> +	pfds->revents = 0;
> +}
> +
> +int size_of_pollfd(void) {
> +	return sizeof(struct pollfd);
> +}
> +
>   int start_io_thread(unsigned long sp, int *fd_out)
>   {
>   	int pid, fds[2], err;
> @@ -36,10 +48,15 @@ int start_io_thread(unsigned long sp, int *fd_out)
>   
>   	err = os_set_fd_block(*fd_out, 0);
>   	if (err) {
> -		printk("start_io_thread - failed to set nonblocking I/O.\n");
> +		printk("start_io_thread - failed to set nonblocking I/O - kernel_fd.\n");
>   		goto out_close;
>   	}
>   
> +	err = os_set_fd_block(kernel_fd, 0);
> +	if (err) {
> +		printk("start_io_thread - failed to set nonblocking I/O - user_fd.\n");
> +		goto out_close;
> +	}
>   	pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL);
>   	if(pid < 0){
>   		err = -errno;
> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
> index 7a04ddd..0b75efa 100644
> --- a/arch/um/include/shared/os.h
> +++ b/arch/um/include/shared/os.h
> @@ -149,6 +149,7 @@ extern int os_file_size(const char *file, unsigned long long *size_out);
>   extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset);
>   extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset);
>   extern int os_file_modtime(const char *file, unsigned long *modtime);
> +extern int os_poll(void *fds, unsigned int nfds, int timeout);
>   extern int os_pipe(int *fd, int stream, int close_on_exec);
>   extern int os_set_fd_async(int fd);
>   extern int os_clear_fd_async(int fd);
> diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
> index 2db18cb..726b1e1 100644
> --- a/arch/um/os-Linux/file.c
> +++ b/arch/um/os-Linux/file.c
> @@ -14,6 +14,7 @@
>   #include <sys/stat.h>
>   #include <sys/un.h>
>   #include <sys/types.h>
> +#include <poll.h>
>   #include <os.h>
>   
>   static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
> @@ -355,6 +356,17 @@ int os_file_modtime(const char *file, unsigned long *modtime)
>   	return 0;
>   }
>   
> +int os_poll(void *fds, unsigned int nfds, int timeout)
> +{
> +	int n;
> +
> +	CATCH_EINTR(n = poll((struct pollfd *) fds, nfds, timeout));
> +	if (n < 0)
> +		return -errno;
> +
> +	return n;
> +}
> +
>   int os_set_exec_close(int fd)
>   {
>   	int err;


------------------------------------------------------------------------------
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [uml-devel] [PATCH 1/2] Update UBD to use pread/pwrite family of functions
  2015-12-21 18:54 [uml-devel] [PATCH 1/2] Update UBD to use pread/pwrite family of functions Anton Ivanov
  2015-12-21 18:54 ` [uml-devel] [PATCH 2/2] Bulk IO Transaction support part 1 Anton Ivanov
@ 2016-01-10 15:57 ` Richard Weinberger
  1 sibling, 0 replies; 9+ messages in thread
From: Richard Weinberger @ 2016-01-10 15:57 UTC (permalink / raw)
  To: Anton Ivanov; +Cc: Richard Weinberger, user-mode-linux-devel

On Mon, Dec 21, 2015 at 7:54 PM, Anton Ivanov <aivanov@brocade.com> wrote:
> This decreases the number of syscalls per read/write by half.
>
> Signed-off-by: Anton Ivanov <aivanov@brocade.com>

Applied!

-- 
Thanks,
//richard

------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=267308311&iu=/4140
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [uml-devel] [PATCH 2/2] Bulk IO Transaction support part 1
  2015-12-21 19:04   ` Anton Ivanov
@ 2016-01-10 16:00     ` Richard Weinberger
  2016-01-10 16:36       ` Anton Ivanov
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Weinberger @ 2016-01-10 16:00 UTC (permalink / raw)
  To: Anton Ivanov; +Cc: Richard Weinberger, user-mode-linux-devel

On Mon, Dec 21, 2015 at 8:04 PM, Anton Ivanov
<anton.ivanov@kot-begemot.co.uk> wrote:
> Hi list, hi Richard,
>
> This rather primitive patchset pushes disk IO by ~ 15%.
>
> dd to /dev/null on a host memory cached 16G sparse disk image with
> ubuntu on it goes from 512MB/s on my machine to 580MB/s.
>
> Part 2 will be ring buffers and read/write poll loop in the io_thread as
> well as elimination of "partial read/write" workarounds. While it does
> not push disk benchmarks a lot it makes UML more responsive.
>
> Part 3 will be merging adjacent IO transactions (by file location) via
> vector IO. This pushes trhoughput even further by quite a bit.
>
> While I can try to submit all of that in one go, I'd rather submit it in
> chunks so it is easier to review even if this will result in patch churn
> (the whole do_safe_read malarkey will bite the bullet once part 2 is out).

Is this already stable enough for this merge window?
At least checkpatch.pl dislikes part 1. :(

-- 
Thanks,
//richard

------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=267308311&iu=/4140
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [uml-devel] [PATCH 2/2] Bulk IO Transaction support part 1
  2016-01-10 16:00     ` Richard Weinberger
@ 2016-01-10 16:36       ` Anton Ivanov
  2016-01-10 21:51         ` Richard Weinberger
  0 siblings, 1 reply; 9+ messages in thread
From: Anton Ivanov @ 2016-01-10 16:36 UTC (permalink / raw)
  To: Richard Weinberger; +Cc: Richard Weinberger, user-mode-linux-devel

On 10/01/16 16:00, Richard Weinberger wrote:
> On Mon, Dec 21, 2015 at 8:04 PM, Anton Ivanov
> <anton.ivanov@kot-begemot.co.uk> wrote:
>> Hi list, hi Richard,
>>
>> This rather primitive patchset pushes disk IO by ~ 15%.
>>
>> dd to /dev/null on a host memory cached 16G sparse disk image with
>> ubuntu on it goes from 512MB/s on my machine to 580MB/s.
>>
>> Part 2 will be ring buffers and read/write poll loop in the io_thread as
>> well as elimination of "partial read/write" workarounds. While it does
>> not push disk benchmarks a lot it makes UML more responsive.
>>
>> Part 3 will be merging adjacent IO transactions (by file location) via
>> vector IO. This pushes trhoughput even further by quite a bit.
>>
>> While I can try to submit all of that in one go, I'd rather submit it in
>> chunks so it is easier to review even if this will result in patch churn
>> (the whole do_safe_read malarkey will bite the bullet once part 2 is out).
> Is this already stable enough for this merge window?
> At least checkpatch.pl dislikes part 1. :(
>
You can leave this one out. I will be working further on this to squeeze
~ 10% more and introduce vector IO.

A.

------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=267308311&iu=/4140
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [uml-devel] [PATCH 2/2] Bulk IO Transaction support part 1
  2016-01-10 16:36       ` Anton Ivanov
@ 2016-01-10 21:51         ` Richard Weinberger
  0 siblings, 0 replies; 9+ messages in thread
From: Richard Weinberger @ 2016-01-10 21:51 UTC (permalink / raw)
  To: Anton Ivanov, Richard Weinberger; +Cc: user-mode-linux-devel

Am 10.01.2016 um 17:36 schrieb Anton Ivanov:
> On 10/01/16 16:00, Richard Weinberger wrote:
>> On Mon, Dec 21, 2015 at 8:04 PM, Anton Ivanov
>> <anton.ivanov@kot-begemot.co.uk> wrote:
>>> Hi list, hi Richard,
>>>
>>> This rather primitive patchset pushes disk IO by ~ 15%.
>>>
>>> dd to /dev/null on a host memory cached 16G sparse disk image with
>>> ubuntu on it goes from 512MB/s on my machine to 580MB/s.
>>>
>>> Part 2 will be ring buffers and read/write poll loop in the io_thread as
>>> well as elimination of "partial read/write" workarounds. While it does
>>> not push disk benchmarks a lot it makes UML more responsive.
>>>
>>> Part 3 will be merging adjacent IO transactions (by file location) via
>>> vector IO. This pushes trhoughput even further by quite a bit.
>>>
>>> While I can try to submit all of that in one go, I'd rather submit it in
>>> chunks so it is easier to review even if this will result in patch churn
>>> (the whole do_safe_read malarkey will bite the bullet once part 2 is out).
>> Is this already stable enough for this merge window?
>> At least checkpatch.pl dislikes part 1. :(
>>
> You can leave this one out. I will be working further on this to squeeze
> ~ 10% more and introduce vector IO.

Ok!

Thanks,
//richard

------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=267308311&iu=/4140
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [uml-devel] [PATCH 2/2] Bulk IO Transaction support part 1
  2015-12-21 18:54 ` [uml-devel] [PATCH 2/2] Bulk IO Transaction support part 1 Anton Ivanov
  2015-12-21 19:04   ` Anton Ivanov
@ 2016-01-12 21:25   ` James McMechan
  2016-01-12 21:39     ` Anton Ivanov
  1 sibling, 1 reply; 9+ messages in thread
From: James McMechan @ 2016-01-12 21:25 UTC (permalink / raw)
  To: Anton Ivanov, uml devel; +Cc: richard

I am a bit late getting back to you  on this but I do have a few comments
The safe_read seems out of tune currently.
Or maybe I am just totally confused :)

> This patch adds support for merging notifications on the ubd
> notification file descriptor. Multiple transactions are processed
> at a time resulting in 10-15% virtual disk speed improvement.
>
> The mechanics are rather primitive - no ring buffers, primitive
> guaranteed flush and guaranteed to-record-size read.
>
> Signed-off-by: Anton Ivanov <aivanov@brocade.com>
> ---
> arch/um/drivers/ubd.h | 2 +
> arch/um/drivers/ubd_kern.c | 156 ++++++++++++++++++++++++++++++++++++--------
> arch/um/drivers/ubd_user.c | 19 +++++-
> arch/um/include/shared/os.h | 1 +
> arch/um/os-Linux/file.c | 12 ++++
> 5 files changed, 161 insertions(+), 29 deletions(-)
>
> diff --git a/arch/um/drivers/ubd.h b/arch/um/drivers/ubd.h
> index 3b48cd2..2c5e6fd 100644
> --- a/arch/um/drivers/ubd.h
> +++ b/arch/um/drivers/ubd.h
> @@ -10,6 +10,8 @@
> extern int start_io_thread(unsigned long sp, int *fds_out);
> extern int io_thread(void *arg);
> extern int kernel_fd;
> +extern void setup_ubd_pollfd(void * fds, int fd);
> +extern int size_of_pollfd(void);
>
> #endif
>
> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
> index 39ba207..9f30c50 100644
> --- a/arch/um/drivers/ubd_kern.c
> +++ b/arch/um/drivers/ubd_kern.c
> @@ -58,6 +58,12 @@ struct io_thread_req {
> int error;
> };
>
> +static void * kernel_pfd = NULL;

These are pointer to pointers?
so this will be an array of 64 pointers kmalloced?
> +struct io_thread_req **kernel_reqs;
> +
> +struct io_thread_req **helper_reqs;
> +
> static inline int ubd_test_bit(__u64 bit, unsigned char *data)
> {
> __u64 n;
> @@ -442,6 +448,32 @@ static void do_ubd_request(struct request_queue * q);
> static int thread_fd = -1;
> static LIST_HEAD(restart);
>

This would be clearer if it was struct io_thread_req **buffer
Ok this is reading MAX_SG pointers rather than the previous single pointer
> +static int do_safe_read(int fd, void * buffer, int max_size, int record_size){
> + unsigned char * buf;
> + int n, size;
> +
> + size = 0;
> + buf = (unsigned char *) buffer;
> +
> + return os_read_file(fd, buf + size, max_size - size);

Your function ends here every thing else is dead code?

> +
> + do {
> + n = os_read_file(fd, buf, max_size - size);

Do you want to return on only no data or a set of complete pointers?
e.g. size % sizeof(void *) == 0 maybe? then return size since we may have several already.
// if we have not read anything yet we can return safely
> + if ((size == 0) && (n == -EAGAIN))
> + return n;

// update how much we have read so far
> + if (n> 0)
> + size = size + n;

A bit of shuffeling here might make it clearer and 
// discard whatever we have read in so far if we were not /going to block/ but errored
> + if ((n < 0) && (n != -EAGAIN))
> + return n;

max_size should be a multiple of record size so the loop would exit anyway?
It might be better to fold it into the while clause below.
// when we have read the amount requested stop
> + if (size == max_size)
> + break;

// stopping if we hit exactly record_size? so N full pointers have been read.
> + } while ((size % record_size) != 0);

// return how much we actually read
> + return size;
> +}
> +
> +
> /* XXX - move this inside ubd_intr. */
> /* Called without dev->lock held, and only in interrupt context. */
> static void ubd_handler(void)
> @@ -450,21 +482,37 @@ static void ubd_handler(void)
> struct ubd *ubd;
> struct list_head *list, *next_ele;
> unsigned long flags;
> - int n;
> + int n, rcount, i;
>
> while(1){
> - n = os_read_file(thread_fd, &req,
> - sizeof(struct io_thread_req *));
> - if(n != sizeof(req)){
> - if(n == -EAGAIN)
> - break;
> - printk(KERN_ERR "spurious interrupt in ubd_handler, "
> - "err = %d\n", -n);
> - return;
> + n = do_safe_read(thread_fd, helper_reqs,
> + sizeof(struct io_thread_req *) * MAX_SG,
> + sizeof(struct io_thread_req *)
> + );


// no data pending so drop out of the loop
> + if(n == -EAGAIN){
> + break;
> + }
> + if(n < 0){
> + printk("io_thread - read failed, fd = %d, "
> + "err = %d\n", thread_fd, -n);
> + }

// if we have a partial pointer complain
> + if(n % sizeof(struct io_thread_req *) != 0){
> + printk("kernel_ubd_io - invalid read, fd = %d, "
> + "read = %d\n", thread_fd, n);
> + continue;
> }
>
> - blk_end_request(req->req, 0, req->length);
> - kfree(req);
> + rcount = n / sizeof(struct io_thread_req *);

// step through each io_thread_req * in order
> + for (i = 0; i < rcount; i++) {

Perhaps *helper_reqs[i] might be clearer dereferencing pointers to pointers
is often hard to understand.
> + req = * (helper_reqs + i);
> + blk_end_request(req->req, 0, req->length);
> + kfree(req);
> +
> + }
> }
> reactivate_fd(thread_fd, UBD_IRQ);
>
> @@ -1080,6 +1128,26 @@ late_initcall(ubd_init);
> static int __init ubd_driver_init(void){
> unsigned long stack;
> int err;
> +
> + kernel_reqs = kmalloc(MAX_SG * sizeof(struct io_thread_req *), GFP_KERNEL);
> + if (kernel_reqs == NULL) {
> + printk("Failed to allocate memory for req buffer\n");
> + return 0;
> + }
> +
> +
> + kernel_pfd = kmalloc(size_of_pollfd(), GFP_KERNEL);
> + if (kernel_pfd == NULL) {
> + printk("Failed to allocate memory for pollfd\n");
> + return 0;
> + }
> +
> + helper_reqs = kmalloc(MAX_SG * sizeof(struct io_thread_req *), GFP_KERNEL);
> + if (helper_reqs == NULL) {

Failed to allocate memory for array of pointers to req buffers
> + printk("Failed to allocate memory for req buffer\n");
> + return 0;
> + }
> +
>
> /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
> if(global_openflags.s){
> @@ -1458,30 +1526,62 @@ static int io_count = 0;
> int io_thread(void *arg)
> {
> struct io_thread_req *req;
> - int n;
> + unsigned char * buffer;
> + int n, rcount, i;
>
> os_fix_helper_signals();
>
> + printk("Starting UBD helper thread\n");
> +
> while(1){
> - n = os_read_file(kernel_fd, &req,
> - sizeof(struct io_thread_req *));
> - if(n != sizeof(struct io_thread_req *)){
> - if(n < 0)

The old code was missing the test for EAGAIN
it was not too likely to fail a 4 byte read or 8 byte on 64bit
but still it should have had it just in case.
> - printk("io_thread - read failed, fd = %d, "
> - "err = %d\n", kernel_fd, -n);
> - else {
> - printk("io_thread - short read, fd = %d, "
> - "length = %d\n", kernel_fd, n);
> - }

Ok, but why do we want to sit in poll here rather than read?
we are nonblocking on purpose right? why build a blocking non-blocking read?
> + setup_ubd_pollfd(kernel_pfd, kernel_fd);
> + os_poll(kernel_pfd, 1, -1);
> + n = do_safe_read(kernel_fd, kernel_reqs,
> + sizeof(struct io_thread_req *) * MAX_SG,
> + sizeof(struct io_thread_req *) * MAX_SG
> + );
> + if(n == -EAGAIN){

it is annoying when diff spilts things so oddly
this closes the if n != sizeof in the old code?
I guess it was supposed to read the rest of the 
req but if would read it at the beginning of the buffer?
> continue;
> }
> - io_count++;
> - do_io(req);
> - n = os_write_file(kernel_fd, &req,
> - sizeof(struct io_thread_req *));
> - if(n != sizeof(struct io_thread_req *))
> + if(n < 0){
> + printk("io_thread - read failed, fd = %d, "
> + "err = %d\n", kernel_fd, -n);
> + }

Do you get a gain here? to try to read more than one pointer at a time
also you are testing for multiple of the size of the pointer?
> + if(n % sizeof(struct io_thread_req *) != 0){
> + printk("io_thread - invalid read, fd = %d, "
> + "read = %d\n", kernel_fd, n);
> + continue;
> + }
> +
> + rcount = n / sizeof(struct io_thread_req *);
> +
> + for (i = 0; i < rcount; i++) {
> +
> + io_count++;
> +
> + req = * (kernel_reqs + i);
> +
> + do_io(req);
> +
> + }
> +
> + buffer = (unsigned char *) kernel_reqs;
> +
> + while (n> 0) {
> + i = os_write_file(kernel_fd, buffer, n);
> + if(i>= 0){
> + buffer = buffer + i;
> + n = n - i;
> + } else {

// We would block try again later
> + if(i != -EAGAIN)
> + break;
> + }
> + }
> +
> + if(n> 0)
> printk("io_thread - write failed, fd = %d, err = %d\n",
> - kernel_fd, -n);
> + kernel_fd, -i);
> }
>
> return 0;
> diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
> index e376f9b..3abec4f 100644
> --- a/arch/um/drivers/ubd_user.c
> +++ b/arch/um/drivers/ubd_user.c
> @@ -17,10 +17,22 @@
> #include <sys/param.h>
> #include <endian.h>
> #include <byteswap.h>
> +#include <poll.h>
>
> #include "ubd.h"
> #include <os.h>
>
> +void setup_ubd_pollfd(void * fds, int fd) {
> + struct pollfd * pfds = (struct pollfd *) fds;
> + pfds->fd = fd;
> + pfds->events = POLLIN | POLLPRI;
> + pfds->revents = 0;
> +}
> +
> +int size_of_pollfd(void) {
> + return sizeof(struct pollfd);
> +}
> +
> int start_io_thread(unsigned long sp, int *fd_out)
> {
> int pid, fds[2], err;
> @@ -36,10 +48,15 @@ int start_io_thread(unsigned long sp, int *fd_out)
>
> err = os_set_fd_block(*fd_out, 0);
> if (err) {
> - printk("start_io_thread - failed to set nonblocking I/O.\n");
> + printk("start_io_thread - failed to set nonblocking I/O - kernel_fd.\n");
> goto out_close;
> }
>
> + err = os_set_fd_block(kernel_fd, 0);
> + if (err) {
> + printk("start_io_thread - failed to set nonblocking I/O - user_fd.\n");
> + goto out_close;
> + }
> pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL);
> if(pid < 0){
> err = -errno;
> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
> index 7a04ddd..0b75efa 100644
> --- a/arch/um/include/shared/os.h
> +++ b/arch/um/include/shared/os.h
> @@ -149,6 +149,7 @@ extern int os_file_size(const char *file, unsigned long long *size_out);
> extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset);
> extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset);
> extern int os_file_modtime(const char *file, unsigned long *modtime);
> +extern int os_poll(void *fds, unsigned int nfds, int timeout);
> extern int os_pipe(int *fd, int stream, int close_on_exec);
> extern int os_set_fd_async(int fd);
> extern int os_clear_fd_async(int fd);
> diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
> index 2db18cb..726b1e1 100644
> --- a/arch/um/os-Linux/file.c
> +++ b/arch/um/os-Linux/file.c
> @@ -14,6 +14,7 @@
> #include <sys/stat.h>
> #include <sys/un.h>
> #include <sys/types.h>
> +#include <poll.h>
> #include <os.h>
>
> static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
> @@ -355,6 +356,17 @@ int os_file_modtime(const char *file, unsigned long *modtime)
> return 0;
> }
>
> +int os_poll(void *fds, unsigned int nfds, int timeout)
> +{
> + int n;
> +
> + CATCH_EINTR(n = poll((struct pollfd *) fds, nfds, timeout));
> + if (n < 0)
> + return -errno;
> +
> + return n;
> +}
> +
> int os_set_exec_close(int fd)
> {
> int err;

 		 	   		  
------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=267308311&iu=/4140
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [uml-devel] [PATCH 2/2] Bulk IO Transaction support part 1
  2016-01-12 21:25   ` James McMechan
@ 2016-01-12 21:39     ` Anton Ivanov
  0 siblings, 0 replies; 9+ messages in thread
From: Anton Ivanov @ 2016-01-12 21:39 UTC (permalink / raw)
  To: user-mode-linux-devel

On 12/01/16 21:25, James McMechan wrote:
> I am a bit late getting back to you  on this but I do have a few comments
> The safe_read seems out of tune currently.
> Or maybe I am just totally confused :)

No, you are not confused, I pushed the send button too early.

It is half-done, I was toying with the idea of doing it step by step 
instead of submitting a huge patch which includes all of the ring buffer 
and vector IO management.

I will do it properly and submit a proper one with ring buffers, etc 
(and no need to safe_read whatsoever).

I will also take whatever is applicable out of your feedback onboard.

A.

>
>> This patch adds support for merging notifications on the ubd
>> notification file descriptor. Multiple transactions are processed
>> at a time resulting in 10-15% virtual disk speed improvement.
>>
>> The mechanics are rather primitive - no ring buffers, primitive
>> guaranteed flush and guaranteed to-record-size read.
>>
>> Signed-off-by: Anton Ivanov <aivanov@brocade.com>
>> ---
>> arch/um/drivers/ubd.h | 2 +
>> arch/um/drivers/ubd_kern.c | 156 ++++++++++++++++++++++++++++++++++++--------
>> arch/um/drivers/ubd_user.c | 19 +++++-
>> arch/um/include/shared/os.h | 1 +
>> arch/um/os-Linux/file.c | 12 ++++
>> 5 files changed, 161 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/um/drivers/ubd.h b/arch/um/drivers/ubd.h
>> index 3b48cd2..2c5e6fd 100644
>> --- a/arch/um/drivers/ubd.h
>> +++ b/arch/um/drivers/ubd.h
>> @@ -10,6 +10,8 @@
>> extern int start_io_thread(unsigned long sp, int *fds_out);
>> extern int io_thread(void *arg);
>> extern int kernel_fd;
>> +extern void setup_ubd_pollfd(void * fds, int fd);
>> +extern int size_of_pollfd(void);
>>
>> #endif
>>
>> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
>> index 39ba207..9f30c50 100644
>> --- a/arch/um/drivers/ubd_kern.c
>> +++ b/arch/um/drivers/ubd_kern.c
>> @@ -58,6 +58,12 @@ struct io_thread_req {
>> int error;
>> };
>>
>> +static void * kernel_pfd = NULL;
> These are pointer to pointers?
> so this will be an array of 64 pointers kmalloced?
>> +struct io_thread_req **kernel_reqs;
>> +
>> +struct io_thread_req **helper_reqs;
>> +
>> static inline int ubd_test_bit(__u64 bit, unsigned char *data)
>> {
>> __u64 n;
>> @@ -442,6 +448,32 @@ static void do_ubd_request(struct request_queue * q);
>> static int thread_fd = -1;
>> static LIST_HEAD(restart);
>>
> This would be clearer if it was struct io_thread_req **buffer
> Ok this is reading MAX_SG pointers rather than the previous single pointer
>> +static int do_safe_read(int fd, void * buffer, int max_size, int record_size){
>> + unsigned char * buf;
>> + int n, size;
>> +
>> + size = 0;
>> + buf = (unsigned char *) buffer;
>> +
>> + return os_read_file(fd, buf + size, max_size - size);
> Your function ends here every thing else is dead code?
>
>> +
>> + do {
>> + n = os_read_file(fd, buf, max_size - size);
> Do you want to return on only no data or a set of complete pointers?
> e.g. size % sizeof(void *) == 0 maybe? then return size since we may have several already.
> // if we have not read anything yet we can return safely
>> + if ((size == 0) && (n == -EAGAIN))
>> + return n;
> // update how much we have read so far
>> + if (n> 0)
>> + size = size + n;
> A bit of shuffeling here might make it clearer and
> // discard whatever we have read in so far if we were not /going to block/ but errored
>> + if ((n < 0) && (n != -EAGAIN))
>> + return n;
> max_size should be a multiple of record size so the loop would exit anyway?
> It might be better to fold it into the while clause below.
> // when we have read the amount requested stop
>> + if (size == max_size)
>> + break;
> // stopping if we hit exactly record_size? so N full pointers have been read.
>> + } while ((size % record_size) != 0);
> // return how much we actually read
>> + return size;
>> +}
>> +
>> +
>> /* XXX - move this inside ubd_intr. */
>> /* Called without dev->lock held, and only in interrupt context. */
>> static void ubd_handler(void)
>> @@ -450,21 +482,37 @@ static void ubd_handler(void)
>> struct ubd *ubd;
>> struct list_head *list, *next_ele;
>> unsigned long flags;
>> - int n;
>> + int n, rcount, i;
>>
>> while(1){
>> - n = os_read_file(thread_fd, &req,
>> - sizeof(struct io_thread_req *));
>> - if(n != sizeof(req)){
>> - if(n == -EAGAIN)
>> - break;
>> - printk(KERN_ERR "spurious interrupt in ubd_handler, "
>> - "err = %d\n", -n);
>> - return;
>> + n = do_safe_read(thread_fd, helper_reqs,
>> + sizeof(struct io_thread_req *) * MAX_SG,
>> + sizeof(struct io_thread_req *)
>> + );
>
> // no data pending so drop out of the loop
>> + if(n == -EAGAIN){
>> + break;
>> + }
>> + if(n < 0){
>> + printk("io_thread - read failed, fd = %d, "
>> + "err = %d\n", thread_fd, -n);
>> + }
> // if we have a partial pointer complain
>> + if(n % sizeof(struct io_thread_req *) != 0){
>> + printk("kernel_ubd_io - invalid read, fd = %d, "
>> + "read = %d\n", thread_fd, n);
>> + continue;
>> }
>>
>> - blk_end_request(req->req, 0, req->length);
>> - kfree(req);
>> + rcount = n / sizeof(struct io_thread_req *);
> // step through each io_thread_req * in order
>> + for (i = 0; i < rcount; i++) {
> Perhaps *helper_reqs[i] might be clearer dereferencing pointers to pointers
> is often hard to understand.
>> + req = * (helper_reqs + i);
>> + blk_end_request(req->req, 0, req->length);
>> + kfree(req);
>> +
>> + }
>> }
>> reactivate_fd(thread_fd, UBD_IRQ);
>>
>> @@ -1080,6 +1128,26 @@ late_initcall(ubd_init);
>> static int __init ubd_driver_init(void){
>> unsigned long stack;
>> int err;
>> +
>> + kernel_reqs = kmalloc(MAX_SG * sizeof(struct io_thread_req *), GFP_KERNEL);
>> + if (kernel_reqs == NULL) {
>> + printk("Failed to allocate memory for req buffer\n");
>> + return 0;
>> + }
>> +
>> +
>> + kernel_pfd = kmalloc(size_of_pollfd(), GFP_KERNEL);
>> + if (kernel_pfd == NULL) {
>> + printk("Failed to allocate memory for pollfd\n");
>> + return 0;
>> + }
>> +
>> + helper_reqs = kmalloc(MAX_SG * sizeof(struct io_thread_req *), GFP_KERNEL);
>> + if (helper_reqs == NULL) {
> Failed to allocate memory for array of pointers to req buffers
>> + printk("Failed to allocate memory for req buffer\n");
>> + return 0;
>> + }
>> +
>>
>> /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
>> if(global_openflags.s){
>> @@ -1458,30 +1526,62 @@ static int io_count = 0;
>> int io_thread(void *arg)
>> {
>> struct io_thread_req *req;
>> - int n;
>> + unsigned char * buffer;
>> + int n, rcount, i;
>>
>> os_fix_helper_signals();
>>
>> + printk("Starting UBD helper thread\n");
>> +
>> while(1){
>> - n = os_read_file(kernel_fd, &req,
>> - sizeof(struct io_thread_req *));
>> - if(n != sizeof(struct io_thread_req *)){
>> - if(n < 0)
> The old code was missing the test for EAGAIN
> it was not too likely to fail a 4 byte read or 8 byte on 64bit
> but still it should have had it just in case.
>> - printk("io_thread - read failed, fd = %d, "
>> - "err = %d\n", kernel_fd, -n);
>> - else {
>> - printk("io_thread - short read, fd = %d, "
>> - "length = %d\n", kernel_fd, n);
>> - }
> Ok, but why do we want to sit in poll here rather than read?
> we are nonblocking on purpose right? why build a blocking non-blocking read?
>> + setup_ubd_pollfd(kernel_pfd, kernel_fd);
>> + os_poll(kernel_pfd, 1, -1);
>> + n = do_safe_read(kernel_fd, kernel_reqs,
>> + sizeof(struct io_thread_req *) * MAX_SG,
>> + sizeof(struct io_thread_req *) * MAX_SG
>> + );
>> + if(n == -EAGAIN){
> it is annoying when diff spilts things so oddly
> this closes the if n != sizeof in the old code?
> I guess it was supposed to read the rest of the
> req but if would read it at the beginning of the buffer?
>> continue;
>> }
>> - io_count++;
>> - do_io(req);
>> - n = os_write_file(kernel_fd, &req,
>> - sizeof(struct io_thread_req *));
>> - if(n != sizeof(struct io_thread_req *))
>> + if(n < 0){
>> + printk("io_thread - read failed, fd = %d, "
>> + "err = %d\n", kernel_fd, -n);
>> + }
> Do you get a gain here? to try to read more than one pointer at a time
> also you are testing for multiple of the size of the pointer?
>> + if(n % sizeof(struct io_thread_req *) != 0){
>> + printk("io_thread - invalid read, fd = %d, "
>> + "read = %d\n", kernel_fd, n);
>> + continue;
>> + }
>> +
>> + rcount = n / sizeof(struct io_thread_req *);
>> +
>> + for (i = 0; i < rcount; i++) {
>> +
>> + io_count++;
>> +
>> + req = * (kernel_reqs + i);
>> +
>> + do_io(req);
>> +
>> + }
>> +
>> + buffer = (unsigned char *) kernel_reqs;
>> +
>> + while (n> 0) {
>> + i = os_write_file(kernel_fd, buffer, n);
>> + if(i>= 0){
>> + buffer = buffer + i;
>> + n = n - i;
>> + } else {
> // We would block try again later
>> + if(i != -EAGAIN)
>> + break;
>> + }
>> + }
>> +
>> + if(n> 0)
>> printk("io_thread - write failed, fd = %d, err = %d\n",
>> - kernel_fd, -n);
>> + kernel_fd, -i);
>> }
>>
>> return 0;
>> diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
>> index e376f9b..3abec4f 100644
>> --- a/arch/um/drivers/ubd_user.c
>> +++ b/arch/um/drivers/ubd_user.c
>> @@ -17,10 +17,22 @@
>> #include <sys/param.h>
>> #include <endian.h>
>> #include <byteswap.h>
>> +#include <poll.h>
>>
>> #include "ubd.h"
>> #include <os.h>
>>
>> +void setup_ubd_pollfd(void * fds, int fd) {
>> + struct pollfd * pfds = (struct pollfd *) fds;
>> + pfds->fd = fd;
>> + pfds->events = POLLIN | POLLPRI;
>> + pfds->revents = 0;
>> +}
>> +
>> +int size_of_pollfd(void) {
>> + return sizeof(struct pollfd);
>> +}
>> +
>> int start_io_thread(unsigned long sp, int *fd_out)
>> {
>> int pid, fds[2], err;
>> @@ -36,10 +48,15 @@ int start_io_thread(unsigned long sp, int *fd_out)
>>
>> err = os_set_fd_block(*fd_out, 0);
>> if (err) {
>> - printk("start_io_thread - failed to set nonblocking I/O.\n");
>> + printk("start_io_thread - failed to set nonblocking I/O - kernel_fd.\n");
>> goto out_close;
>> }
>>
>> + err = os_set_fd_block(kernel_fd, 0);
>> + if (err) {
>> + printk("start_io_thread - failed to set nonblocking I/O - user_fd.\n");
>> + goto out_close;
>> + }
>> pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL);
>> if(pid < 0){
>> err = -errno;
>> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
>> index 7a04ddd..0b75efa 100644
>> --- a/arch/um/include/shared/os.h
>> +++ b/arch/um/include/shared/os.h
>> @@ -149,6 +149,7 @@ extern int os_file_size(const char *file, unsigned long long *size_out);
>> extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset);
>> extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset);
>> extern int os_file_modtime(const char *file, unsigned long *modtime);
>> +extern int os_poll(void *fds, unsigned int nfds, int timeout);
>> extern int os_pipe(int *fd, int stream, int close_on_exec);
>> extern int os_set_fd_async(int fd);
>> extern int os_clear_fd_async(int fd);
>> diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
>> index 2db18cb..726b1e1 100644
>> --- a/arch/um/os-Linux/file.c
>> +++ b/arch/um/os-Linux/file.c
>> @@ -14,6 +14,7 @@
>> #include <sys/stat.h>
>> #include <sys/un.h>
>> #include <sys/types.h>
>> +#include <poll.h>
>> #include <os.h>
>>
>> static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
>> @@ -355,6 +356,17 @@ int os_file_modtime(const char *file, unsigned long *modtime)
>> return 0;
>> }
>>
>> +int os_poll(void *fds, unsigned int nfds, int timeout)
>> +{
>> + int n;
>> +
>> + CATCH_EINTR(n = poll((struct pollfd *) fds, nfds, timeout));
>> + if (n < 0)
>> + return -errno;
>> +
>> + return n;
>> +}
>> +
>> int os_set_exec_close(int fd)
>> {
>> int err;
>   		 	   		
> ------------------------------------------------------------------------------
> Site24x7 APM Insight: Get Deep Visibility into Application Performance
> APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
> Monitor end-to-end web transactions and take corrective actions now
> Troubleshoot faster and improve end-user experience. Signup Now!
> http://pubads.g.doubleclick.net/gampad/clk?id=267308311&iu=/4140
> _______________________________________________
> User-mode-linux-devel mailing list
> User-mode-linux-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel
>


------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=267308311&iu=/4140
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-01-12 21:39 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-21 18:54 [uml-devel] [PATCH 1/2] Update UBD to use pread/pwrite family of functions Anton Ivanov
2015-12-21 18:54 ` [uml-devel] [PATCH 2/2] Bulk IO Transaction support part 1 Anton Ivanov
2015-12-21 19:04   ` Anton Ivanov
2016-01-10 16:00     ` Richard Weinberger
2016-01-10 16:36       ` Anton Ivanov
2016-01-10 21:51         ` Richard Weinberger
2016-01-12 21:25   ` James McMechan
2016-01-12 21:39     ` Anton Ivanov
2016-01-10 15:57 ` [uml-devel] [PATCH 1/2] Update UBD to use pread/pwrite family of functions Richard Weinberger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.