All of lore.kernel.org
 help / color / mirror / Atom feed
* [uml-devel] [PATCH v2] EPOLL Interrupt Controller V2.0
@ 2015-11-09 14:33 Anton Ivanov
  2015-11-09 15:03 ` Anton Ivanov
  0 siblings, 1 reply; 14+ messages in thread
From: Anton Ivanov @ 2015-11-09 14:33 UTC (permalink / raw)
  To: user-mode-linux-devel; +Cc: Anton Ivanov

Epoll based interrupt controller.

IMPROVES: IO loop performance - no per fd lookups, allowing for
15% IO speedup in minimal config going to 100s of % with many
devices - a N^N lookup is now replaced by a log(N)

ADDS: True Write IRQ functionality

OBSOLETES: The need to call reactivate_fd() in any driver which
has only read IRQ semantics. Write IRQs work, but will need to
be updated to use this fully.

Potentially (with a change in API) will allow both edge and level
IRQ semantics.

Pre-requisite for using packet mmap and multipacket read/write
which do not get along with poll() very well.

Signed-off-by: Anton Ivanov <aivanov@brocade.com>
---
 arch/um/drivers/line.c            |   5 +-
 arch/um/drivers/mconsole_kern.c   |   2 -
 arch/um/drivers/net_kern.c        |   1 -
 arch/um/drivers/port_kern.c       |   1 -
 arch/um/drivers/random.c          |   1 -
 arch/um/drivers/ubd_kern.c        |   1 -
 arch/um/include/shared/irq_user.h |  24 ++-
 arch/um/include/shared/os.h       |  14 +-
 arch/um/kernel/irq.c              | 412 ++++++++++++++++++++++----------------
 arch/um/os-Linux/irq.c            | 150 ++++++--------
 10 files changed, 329 insertions(+), 282 deletions(-)

diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 6208702..84384c8 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
@@ -283,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
 	if (err)
 		return err;
 	if (output)
-		err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
+		err = um_request_irq(driver->write_irq, fd, IRQ_NONE,
 				     line_write_interrupt, IRQF_SHARED,
 				     driver->write_irq_name, data);
 	return err;
@@ -666,8 +667,6 @@ static irqreturn_t winch_interrupt(int irq, void *data)
 		tty_kref_put(tty);
 	}
  out:
-	if (winch->fd != -1)
-		reactivate_fd(winch->fd, WINCH_IRQ);
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 29880c9..5e8881c 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -95,7 +95,6 @@ static irqreturn_t mconsole_interrupt(int irq, void *dev_id)
 	}
 	if (!list_empty(&mc_requests))
 		schedule_work(&mconsole_work);
-	reactivate_fd(fd, MCONSOLE_IRQ);
 	return IRQ_HANDLED;
 }
 
@@ -243,7 +242,6 @@ void mconsole_stop(struct mc_request *req)
 		(*req->cmd->handler)(req);
 	}
 	os_set_fd_block(req->originating_fd, 0);
-	reactivate_fd(req->originating_fd, MCONSOLE_IRQ);
 	mconsole_reply(req, "", 0, 0);
 }
 
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index f70dd54..82ea3a2 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -137,7 +137,6 @@ static irqreturn_t uml_net_interrupt(int irq, void *dev_id)
 		schedule_work(&lp->work);
 		goto out;
 	}
-	reactivate_fd(lp->fd, UM_ETH_IRQ);
 
 out:
 	spin_unlock(&lp->lock);
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index 40ca5cc..b0e9ff3 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -137,7 +137,6 @@ static void port_work_proc(struct work_struct *unused)
 		if (!port->has_connection)
 			continue;
 
-		reactivate_fd(port->fd, ACCEPT_IRQ);
 		while (port_accept(port))
 			;
 		port->has_connection = 0;
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index dd16c90..a392828 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -72,7 +72,6 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
 				return ret ? : -EAGAIN;
 
 			atomic_inc(&host_sleep_count);
-			reactivate_fd(random_fd, RANDOM_IRQ);
 			add_sigio_fd(random_fd);
 
 			add_wait_queue(&host_read_wait, &wait);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index e8ab93c..731982c 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -466,7 +466,6 @@ static void ubd_handler(void)
 		blk_end_request(req->req, 0, req->length);
 		kfree(req);
 	}
-	reactivate_fd(thread_fd, UBD_IRQ);
 
 	list_for_each_safe(list, next_ele, &restart){
 		ubd = container_of(list, struct ubd, restart);
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index df56330..0eca64c 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
@@ -9,16 +10,23 @@
 #include <sysdep/ptrace.h>
 
 struct irq_fd {
-	struct irq_fd *next;
-	void *id;
-	int fd;
-	int type;
-	int irq;
-	int events;
-	int current_events;
+        void *id;
+        int irq;
+        int events;
+};
+
+
+#define IRQ_READ  0
+#define IRQ_WRITE 1 
+#define IRQ_NONE 2
+#define MAX_IRQ_TYPE (IRQ_NONE + 1)
+
+struct irq_entry {
+        struct irq_entry *next;
+        int fd;
+	struct irq_fd * irq_array[MAX_IRQ_TYPE + 1];
 };
 
-enum { IRQ_READ, IRQ_WRITE };
 
 struct siginfo;
 extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 21d704b..c449839 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
@@ -284,15 +285,18 @@ extern void halt_skas(void);
 extern void reboot_skas(void);
 
 /* irq.c */
-extern int os_waiting_for_events(struct irq_fd *active_fds);
-extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds);
+
+extern int os_setup_epoll(int maxevents);
+extern int os_waiting_for_events_epoll(void *kernel_events, int maxevents);
+extern int os_add_epoll_fd (int events, int fd, void * data);
+extern int os_mod_epoll_fd (int events, int fd, void * data);
+extern int os_del_epoll_fd (int fd);
+
 extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
 		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2);
 extern void os_free_irq_later(struct irq_fd *active_fds,
 		int irq, void *dev_id);
-extern int os_get_pollfd(int i);
-extern void os_set_pollfd(int i, int fd);
-extern void os_set_ioignore(void);
+extern void os_close_epoll(void);
 
 /* sigio.c */
 extern int add_sigio_fd(int fd);
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 23cb935..ff3069b 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -1,4 +1,7 @@
 /*
+ * Copyright (C) 2015 Brocade Communications Ltd
+ *	Author: Anton Ivanov aivanov@{brocade.com,kot-begemot.co.uk}
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
@@ -18,6 +21,61 @@
 #include <os.h>
 
 /*
+*	We are on the "kernel side" so we cannot pick up the sys/epoll.h
+*	So we lift out of it the applicable key definitions.
+*/
+
+
+enum EPOLL_EVENTS
+  {
+	EPOLLIN = 0x001,
+#define EPOLLIN EPOLLIN
+	EPOLLPRI = 0x002,
+#define EPOLLPRI EPOLLPRI
+	EPOLLOUT = 0x004,
+#define EPOLLOUT EPOLLOUT
+	EPOLLRDNORM = 0x040,
+#define EPOLLRDNORM EPOLLRDNORM
+	EPOLLRDBAND = 0x080,
+#define EPOLLRDBAND EPOLLRDBAND
+	EPOLLWRNORM = 0x100,
+#define EPOLLWRNORM EPOLLWRNORM
+	EPOLLWRBAND = 0x200,
+#define EPOLLWRBAND EPOLLWRBAND
+	EPOLLMSG = 0x400,
+#define EPOLLMSG EPOLLMSG
+	EPOLLERR = 0x008,
+#define EPOLLERR EPOLLERR
+	EPOLLHUP = 0x010,
+#define EPOLLHUP EPOLLHUP
+	EPOLLRDHUP = 0x2000,
+#define EPOLLRDHUP EPOLLRDHUP
+	EPOLLONESHOT = (1 << 30),
+#define EPOLLONESHOT EPOLLONESHOT
+	EPOLLET = (1 << 31)
+#define EPOLLET EPOLLET
+  };
+
+
+typedef union epoll_data
+{
+	void *ptr;
+	int fd;
+	uint32_t u32;
+	uint64_t u64;
+} epoll_data_t;
+
+struct epoll_event
+{
+	uint32_t events;	/* Epoll events */
+	epoll_data_t data;	/* User data variable */
+} __attribute__ ((__packed__));
+
+#define MAX_EPOLL_EVENTS 16
+
+static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
+
+/*
  * This list is accessed under irq_lock, except in sigio_handler,
  * where it is safe from being modified.  IRQ handlers won't change it -
  * if an IRQ source has vanished, it will be freed by free_irqs just
@@ -25,44 +83,92 @@
  * list of irqs to free, with its own locking, coming back here to
  * remove list elements, taking the irq_lock to do so.
  */
-static struct irq_fd *active_fds = NULL;
-static struct irq_fd **last_irq_ptr = &active_fds;
+static struct irq_entry *active_fds = NULL;
 
 extern void free_irqs(void);
 
+
+static DEFINE_SPINLOCK(irq_lock);
+
+
+/*
+ * Principles of Operation:
+ * Each Epoll structure contains a pointer pointing back to an array
+ * with irq entries for read, write and none and their matching event
+ * masks.
+ * This allows us to stop looking up "who talked"
+ * We no longer need to enable/disable any polls while we process them
+ * epoll will take care of that. The exemption to this (for now) are
+ * character devices because of their own internal buffering, which
+ * needs to be updated to leverage the new write IRQ semantics.
+ * We can now support both read and write IRQs and have separate IRQs
+ * for read and write ops.
+ */
+
+
 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
 	struct irq_fd *irq_fd;
-	int n;
+	struct irq_entry *irq_entry;
+	unsigned long flags;
+
+	int n, i, j;
 
 	while (1) {
-		n = os_waiting_for_events(active_fds);
-		if (n <= 0) {
-			if (n == -EINTR)
-				continue;
-			else break;
-		}
 
-		for (irq_fd = active_fds; irq_fd != NULL;
-		     irq_fd = irq_fd->next) {
-			if (irq_fd->current_events != 0) {
-				irq_fd->current_events = 0;
-				do_IRQ(irq_fd->irq, regs);
-			}
+		spin_lock_irqsave(&irq_lock, flags);
+
+		n = os_waiting_for_events_epoll(
+			&epoll_events, MAX_EPOLL_EVENTS
+		);
+
+
+ 		if (n <= 0) {
+			if (n == -EINTR) { continue; }
+			else { break; }
 		}
+
+
+		for (i = 0; i < n ; i++) {
+			/* start from the data ptr, walk the tree branch */
+			irq_entry = (struct irq_entry *) epoll_events[i].data.ptr;
+			for (j = 0; j < MAX_IRQ_TYPE ; j ++ ) {
+				irq_fd = irq_entry->irq_array[j];
+				if (irq_fd != NULL) {
+					if (epoll_events[i].events & irq_fd->events) {
+						do_IRQ(irq_fd->irq, regs);
+					}
+				}
+ 			}
+ 		}
+		spin_unlock_irqrestore(&irq_lock, flags);
 	}
 
 	free_irqs();
 }
 
-static DEFINE_SPINLOCK(irq_lock);
+static int update_events(struct irq_entry * irq_entry)
+{
+	int i;
+	int events = 0;
+	struct irq_fd * irq_fd;
+	for (i = 0; i < MAX_IRQ_TYPE ; i ++ ) {
+		irq_fd = irq_entry->irq_array[i];
+		if (irq_fd != NULL) {
+			events = irq_fd->events | events;
+		}
+	}
+	/* os_add_epoll will call os_mod_epoll if this already exists */
+	return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
+}
+
 
 static int activate_fd(int irq, int fd, int type, void *dev_id)
 {
-	struct pollfd *tmp_pfd;
-	struct irq_fd *new_fd, *irq_fd;
+	struct irq_fd *new_fd;
+	struct irq_entry * irq_entry;
 	unsigned long flags;
-	int events, err, n;
+	int  i, err, events;
 
 	err = os_set_fd_async(fd);
 	if (err < 0)
@@ -74,186 +180,152 @@ static int activate_fd(int irq, int fd, int type, void *dev_id)
 		goto out;
 
 	if (type == IRQ_READ)
-		events = UM_POLLIN | UM_POLLPRI;
-	else events = UM_POLLOUT;
-	*new_fd = ((struct irq_fd) { .next  		= NULL,
-				     .id 		= dev_id,
-				     .fd 		= fd,
-				     .type 		= type,
-				     .irq 		= irq,
-				     .events 		= events,
-				     .current_events 	= 0 } );
-
-	err = -EBUSY;
-	spin_lock_irqsave(&irq_lock, flags);
-	for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
-		if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
-			printk(KERN_ERR "Registering fd %d twice\n", fd);
-			printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq);
-			printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id,
-			       dev_id);
-			goto out_unlock;
-		}
-	}
-
+		events |= EPOLLIN | EPOLLPRI;
 	if (type == IRQ_WRITE)
-		fd = -1;
+		events |= EPOLLOUT;
 
-	tmp_pfd = NULL;
-	n = 0;
+	*new_fd = ((struct irq_fd) {
+		.id 		= dev_id,
+		.irq 		= irq,
+		.events 	= events
+	});
 
-	while (1) {
-		n = os_create_pollfd(fd, events, tmp_pfd, n);
-		if (n == 0)
-			break;
+	err = -EBUSY;
 
-		/*
-		 * n > 0
-		 * It means we couldn't put new pollfd to current pollfds
-		 * and tmp_fds is NULL or too small for new pollfds array.
-		 * Needed size is equal to n as minimum.
-		 *
-		 * Here we have to drop the lock in order to call
-		 * kmalloc, which might sleep.
-		 * If something else came in and changed the pollfds array
-		 * so we will not be able to put new pollfd struct to pollfds
-		 * then we free the buffer tmp_fds and try again.
-		 */
-		spin_unlock_irqrestore(&irq_lock, flags);
-		kfree(tmp_pfd);
+	spin_lock_irqsave(&irq_lock, flags);
 
-		tmp_pfd = kmalloc(n, GFP_KERNEL);
-		if (tmp_pfd == NULL)
-			goto out_kfree;
+	for (irq_entry = active_fds; irq_entry != NULL; irq_entry = irq_entry->next) {
+		if (irq_entry->fd == fd) break;
+	}
 
-		spin_lock_irqsave(&irq_lock, flags);
+	if (irq_entry == NULL) {
+		irq_entry = kmalloc(sizeof(struct irq_entry), GFP_KERNEL);
+		if (irq_entry == NULL) {
+			printk(KERN_ERR
+				"Failed to allocate new IRQ entry\n");
+			kfree(new_fd);
+			goto out;
+		}
+		irq_entry->fd = fd;
+		for (i = 0; i < MAX_IRQ_TYPE; i++) {
+			irq_entry->irq_array[i] = NULL;
+		}
+		irq_entry->next = active_fds;
+		active_fds = irq_entry;
 	}
 
-	*last_irq_ptr = new_fd;
-	last_irq_ptr = &new_fd->next;
+	if (irq_entry->irq_array[type] != NULL) {
+		printk(KERN_ERR
+			"Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
+			irq, fd, type, dev_id
+		);
+		goto out_unlock;
+	} else {
+		irq_entry->irq_array[type] = new_fd;
+	}
 
+	update_events(irq_entry);
+	
 	spin_unlock_irqrestore(&irq_lock, flags);
 
-	/*
-	 * This calls activate_fd, so it has to be outside the critical
-	 * section.
-	 */
-	maybe_sigio_broken(fd, (type == IRQ_READ));
+	maybe_sigio_broken(fd, (type != IRQ_NONE));
 
 	return 0;
 
  out_unlock:
 	spin_unlock_irqrestore(&irq_lock, flags);
- out_kfree:
 	kfree(new_fd);
  out:
 	return err;
 }
 
-static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
-{
-	unsigned long flags;
 
-	spin_lock_irqsave(&irq_lock, flags);
-	os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
-	spin_unlock_irqrestore(&irq_lock, flags);
-}
-
-struct irq_and_dev {
-	int irq;
-	void *dev;
-};
-
-static int same_irq_and_dev(struct irq_fd *irq, void *d)
+static void do_free_by_irq_and_dev(
+	struct irq_entry* irq_entry,
+	unsigned int irq,
+	void * dev
+)
 {
-	struct irq_and_dev *data = d;
-
-	return ((irq->irq == data->irq) && (irq->id == data->dev));
+	int i;
+	struct irq_fd * to_free;
+	for (i = 0; i < MAX_IRQ_TYPE ; i ++ ) {
+		if (irq_entry->irq_array[i] != NULL) {
+			if (
+				(irq_entry->irq_array[i]->irq == irq) &&
+				(irq_entry->irq_array[i]->id == dev)
+			) {
+				to_free = irq_entry->irq_array[i];
+				irq_entry->irq_array[i] = NULL;
+				update_events(irq_entry);
+				kfree(to_free);
+			}
+		}
+	}
 }
 
-static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
+void free_irq_by_fd(int fd)
 {
-	struct irq_and_dev data = ((struct irq_and_dev) { .irq  = irq,
-							  .dev  = dev });
 
-	free_irq_by_cb(same_irq_and_dev, &data);
-}
+	struct irq_entry *irq_entry, *prev = NULL;
+	unsigned long flags;
+	int i;
 
-static int same_fd(struct irq_fd *irq, void *fd)
-{
-	return (irq->fd == *((int *)fd));
+	spin_lock_irqsave(&irq_lock, flags);
+	for (irq_entry = active_fds; irq_entry != NULL; irq_entry = irq_entry->next) {
+		if (irq_entry->fd == irq_entry->fd) {
+			os_del_epoll_fd(fd);   /* ignore err, just do it */
+			for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+				if (irq_entry->irq_array[i] != NULL) {
+					kfree(irq_entry->irq_array[i]);
+				}
+			}
+			if (prev == NULL) {
+				active_fds = irq_entry->next;
+			} else {
+				prev->next = irq_entry->next;
+			}
+			kfree(irq_entry);
+		} else {
+			prev = irq_entry;
+		}
+	}
+	spin_unlock_irqrestore(&irq_lock, flags);
+	
 }
 
-void free_irq_by_fd(int fd)
-{
-	free_irq_by_cb(same_fd, &fd);
-}
 
-/* Must be called with irq_lock held */
-static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
+static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
 {
-	struct irq_fd *irq;
-	int i = 0;
-	int fdi;
-
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		if ((irq->fd == fd) && (irq->irq == irqnum))
-			break;
-		i++;
-	}
-	if (irq == NULL) {
-		printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n",
-		       fd);
-		goto out;
-	}
-	fdi = os_get_pollfd(i);
-	if ((fdi != -1) && (fdi != fd)) {
-		printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
-		       "and pollfds, fd %d vs %d, need %d\n", irq->fd,
-		       fdi, fd);
-		irq = NULL;
-		goto out;
-	}
-	*index_out = i;
- out:
-	return irq;
-}
 
-void reactivate_fd(int fd, int irqnum)
-{
-	struct irq_fd *irq;
+	struct irq_entry *irq_entry;
 	unsigned long flags;
-	int i;
 
 	spin_lock_irqsave(&irq_lock, flags);
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL) {
-		spin_unlock_irqrestore(&irq_lock, flags);
-		return;
+	for (irq_entry = active_fds; irq_entry != NULL; irq_entry = irq_entry->next) {
+		do_free_by_irq_and_dev(irq_entry, irq, dev);
 	}
-	os_set_pollfd(i, irq->fd);
 	spin_unlock_irqrestore(&irq_lock, flags);
-
-	add_sigio_fd(fd);
+	
 }
 
-void deactivate_fd(int fd, int irqnum)
+
+void reactivate_fd(int fd, int irqnum)
 {
-	struct irq_fd *irq;
+	struct irq_entry *irq_entry;
 	unsigned long flags;
-	int i;
-
 	spin_lock_irqsave(&irq_lock, flags);
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL) {
-		spin_unlock_irqrestore(&irq_lock, flags);
-		return;
+	for (irq_entry = active_fds; irq_entry != NULL; irq_entry = irq_entry->next) {
+		if (irq_entry->fd == fd) {
+			update_events(irq_entry);
+		}
 	}
-
-	os_set_pollfd(i, -1);
 	spin_unlock_irqrestore(&irq_lock, flags);
+	
+}
 
-	ignore_sigio_fd(fd);
+void deactivate_fd(int fd, int irqnum)
+{
+	os_del_epoll_fd(fd);   /* ignore err, just do it */
 }
 EXPORT_SYMBOL(deactivate_fd);
 
@@ -265,17 +337,17 @@ EXPORT_SYMBOL(deactivate_fd);
  */
 int deactivate_all_fds(void)
 {
-	struct irq_fd *irq;
+	struct irq_entry * irq_entry;
 	int err;
 
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		err = os_clear_fd_async(irq->fd);
-		if (err)
-			return err;
+	for (irq_entry = active_fds; irq_entry != NULL; irq_entry = irq_entry->next) {
+		os_del_epoll_fd(irq_entry->fd);   /* ignore err, just do it */
+		err = os_clear_fd_async(irq_entry->fd);
+		if (err) {
+			printk(KERN_ERR "Clear FD async failed with %d", err);
+		}
 	}
-	/* If there is a signal already queued, after unblocking ignore it */
-	os_set_ioignore();
-
+	os_close_epoll();
 	return 0;
 }
 
@@ -308,13 +380,13 @@ int um_request_irq(unsigned int irq, int fd, int type,
 {
 	int err;
 
-	if (fd != -1) {
+	err = request_irq(irq, handler, irqflags, devname, dev_id);
+
+	if ((!err) && (fd != -1)) {
 		err = activate_fd(irq, fd, type, dev_id);
-		if (err)
-			return err;
 	}
 
-	return request_irq(irq, handler, irqflags, devname, dev_id);
+	return err;
 }
 
 EXPORT_SYMBOL(um_request_irq);
@@ -352,9 +424,9 @@ void __init init_IRQ(void)
 	int i;
 
 	irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
-
-	for (i = 1; i < NR_IRQS; i++)
+	for (i = 1; i < NR_IRQS - 1 ; i++)
 		irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
+	os_setup_epoll(MAX_EPOLL_EVENTS);
 }
 
 /*
@@ -382,11 +454,11 @@ void __init init_IRQ(void)
  * thread_info.
  *
  * There are three cases -
- *     The first interrupt on the stack - sets up the thread_info and
+ *	 The first interrupt on the stack - sets up the thread_info and
  * handles the interrupt
- *     A nested interrupt interrupting the copying of the thread_info -
+ *	 A nested interrupt interrupting the copying of the thread_info -
  * can't handle the interrupt, as the stack is in an unknown state
- *     A nested interrupt not interrupting the copying of the
+ *	 A nested interrupt not interrupting the copying of the
  * thread_info - doesn't do any setup, just handles the interrupt
  *
  * The first job is to figure out whether we interrupted stack setup.
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index b9afb74..81b135a 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
@@ -6,6 +7,7 @@
 #include <stdlib.h>
 #include <errno.h>
 #include <poll.h>
+#include <sys/epoll.h>
 #include <signal.h>
 #include <string.h>
 #include <irq_user.h>
@@ -16,120 +18,88 @@
  * Locked by irq_lock in arch/um/kernel/irq.c.  Changed by os_create_pollfd
  * and os_free_irq_by_cb, which are called under irq_lock.
  */
-static struct pollfd *pollfds = NULL;
-static int pollfds_num = 0;
-static int pollfds_size = 0;
 
-int os_waiting_for_events(struct irq_fd *active_fds)
+/* epoll support */
+
+
+static int epollfd = -1;
+
+int os_setup_epoll(int maxevents) {
+	epollfd = epoll_create(maxevents);
+	return epollfd;
+}
+
+int os_waiting_for_events_epoll(void *kernel_events, int maxevents)
 {
-	struct irq_fd *irq_fd;
-	int i, n, err;
+	int n, err;
 
-	n = poll(pollfds, pollfds_num, 0);
+	n = epoll_wait(epollfd,
+		(struct epoll_event *) kernel_events, maxevents, 0);
 	if (n < 0) {
 		err = -errno;
 		if (errno != EINTR)
-			printk(UM_KERN_ERR "os_waiting_for_events:"
-			       " poll returned %d, errno = %d\n", n, errno);
+			printk(
+				UM_KERN_ERR "os_waiting_for_events:"
+				" poll returned %d, error = %s\n", n,
+				strerror(errno)
+			);
 		return err;
 	}
 
-	if (n == 0)
-		return 0;
+	return n;
+}
 
-	irq_fd = active_fds;
+int os_add_epoll_fd (int events, int fd, void * data) {
+	struct epoll_event event;
+	int result;
 
-	for (i = 0; i < pollfds_num; i++) {
-		if (pollfds[i].revents != 0) {
-			irq_fd->current_events = pollfds[i].revents;
-			pollfds[i].fd = -1;
-		}
-		irq_fd = irq_fd->next;
+	event.data.ptr = data;
+	event.events = events | EPOLLET;
+	result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+	if ((result) && (errno == EEXIST)) {
+		result = os_mod_epoll_fd (events, fd, data);
 	}
-	return n;
+	if (result) {
+		printk("epollctl add err fd %d, %s\n", fd, strerror(errno));
+	}
+	return result;
 }
 
-int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
-{
-	if (pollfds_num == pollfds_size) {
-		if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) {
-			/* return min size needed for new pollfds area */
-			return (pollfds_size + 1) * sizeof(pollfds[0]);
-		}
-
-		if (pollfds != NULL) {
-			memcpy(tmp_pfd, pollfds,
-			       sizeof(pollfds[0]) * pollfds_size);
-			/* remove old pollfds */
-			kfree(pollfds);
-		}
-		pollfds = tmp_pfd;
-		pollfds_size++;
-	} else
-		kfree(tmp_pfd);	/* remove not used tmp_pfd */
-
-	pollfds[pollfds_num] = ((struct pollfd) { .fd		= fd,
-						  .events	= events,
-						  .revents	= 0 });
-	pollfds_num++;
-
-	return 0;
+int os_mod_epoll_fd (int events, int fd, void * data) {
+	struct epoll_event event;
+	int result;
+	event.data.ptr = data;
+	event.events = events;
+	result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event);
+	if (result) {
+		printk("epollctl mod err fd %d, %s\n", fd, strerror(errno));
+	}
+	return result;
 }
 
-void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
-		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
-{
-	struct irq_fd **prev;
-	int i = 0;
-
-	prev = &active_fds;
-	while (*prev != NULL) {
-		if ((*test)(*prev, arg)) {
-			struct irq_fd *old_fd = *prev;
-			if ((pollfds[i].fd != -1) &&
-			    (pollfds[i].fd != (*prev)->fd)) {
-				printk(UM_KERN_ERR "os_free_irq_by_cb - "
-				       "mismatch between active_fds and "
-				       "pollfds, fd %d vs %d\n",
-				       (*prev)->fd, pollfds[i].fd);
-				goto out;
-			}
-
-			pollfds_num--;
-
-			/*
-			 * This moves the *whole* array after pollfds[i]
-			 * (though it doesn't spot as such)!
-			 */
-			memmove(&pollfds[i], &pollfds[i + 1],
-			       (pollfds_num - i) * sizeof(pollfds[0]));
-			if (*last_irq_ptr2 == &old_fd->next)
-				*last_irq_ptr2 = prev;
-
-			*prev = (*prev)->next;
-			if (old_fd->type == IRQ_WRITE)
-				ignore_sigio_fd(old_fd->fd);
-			kfree(old_fd);
-			continue;
-		}
-		prev = &(*prev)->next;
-		i++;
+int os_del_epoll_fd (int fd) {
+	struct epoll_event event;
+	int result;
+	result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
+	if (result) {
+		printk("epollctl del err %s\n", strerror(errno));
 	}
- out:
-	return;
+	return result;
 }
 
-int os_get_pollfd(int i)
+void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
+		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
 {
-	return pollfds[i].fd;
+	printk("Someone invoking obsolete deactivate_by_CB!!!\n");
+	return;
 }
 
-void os_set_pollfd(int i, int fd)
+void os_set_ioignore(void)
 {
-	pollfds[i].fd = fd;
+	signal(SIGIO, SIG_IGN);
 }
 
-void os_set_ioignore(void)
+void os_close_epoll(void)
 {
-	signal(SIGIO, SIG_IGN);
+	os_close_file(epollfd);
 }
-- 
2.1.4


------------------------------------------------------------------------------
Presto, an open source distributed SQL query engine for big data, initially
developed by Facebook, enables you to easily query your data on Hadoop in a 
more interactive manner. Teradata is also now providing full enterprise
support for Presto. Download a free open source copy now.
http://pubads.g.doubleclick.net/gampad/clk?id=250295911&iu=/4140
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel


^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-11-18  8:33 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-09 14:33 [uml-devel] [PATCH v2] EPOLL Interrupt Controller V2.0 Anton Ivanov
2015-11-09 15:03 ` Anton Ivanov
2015-11-10 18:42   ` Anton Ivanov
2015-11-10 20:24     ` Richard Weinberger
2015-11-11 20:46   ` Thomas Meyer
2015-11-11 21:05     ` Richard Weinberger
2015-11-11 21:39       ` Anton Ivanov
2015-11-11 21:49         ` stian
2015-11-11 23:25           ` Anton Ivanov
2015-11-12 12:29       ` Anton Ivanov
2015-11-12 15:23         ` Anton Ivanov
2015-11-12 16:03           ` Anton Ivanov
2015-11-16  8:09             ` Anton Ivanov
2015-11-18  8:33               ` Anton Ivanov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.