All of lore.kernel.org
 help / color / mirror / Atom feed
From: Anton Ivanov <aivanov@brocade.com>
To: user-mode-linux-devel@lists.sourceforge.net
Cc: Anton Ivanov <aivanov@brocade.com>
Subject: [uml-devel] [PATCH v2] EPOLL Interrupt Controller V2.0
Date: Mon,  9 Nov 2015 14:33:17 +0000	[thread overview]
Message-ID: <1447079597-17816-1-git-send-email-aivanov@brocade.com> (raw)

Epoll based interrupt controller.

IMPROVES: IO loop performance - no per fd lookups, allowing for
15% IO speedup in minimal config going to 100s of % with many
devices - a N^N lookup is now replaced by a log(N)

ADDS: True Write IRQ functionality

OBSOLETES: The need to call reactivate_fd() in any driver which
has only read IRQ semantics. Write IRQs work, but will need to
be updated to use this fully.

Potentially (with a change in API) will allow both edge and level
IRQ semantics.

Pre-requisite for using packet mmap and multipacket read/write
which do not get along with poll() very well.

Signed-off-by: Anton Ivanov <aivanov@brocade.com>
---
 arch/um/drivers/line.c            |   5 +-
 arch/um/drivers/mconsole_kern.c   |   2 -
 arch/um/drivers/net_kern.c        |   1 -
 arch/um/drivers/port_kern.c       |   1 -
 arch/um/drivers/random.c          |   1 -
 arch/um/drivers/ubd_kern.c        |   1 -
 arch/um/include/shared/irq_user.h |  24 ++-
 arch/um/include/shared/os.h       |  14 +-
 arch/um/kernel/irq.c              | 412 ++++++++++++++++++++++----------------
 arch/um/os-Linux/irq.c            | 150 ++++++--------
 10 files changed, 329 insertions(+), 282 deletions(-)

diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 6208702..84384c8 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
@@ -283,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
 	if (err)
 		return err;
 	if (output)
-		err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
+		err = um_request_irq(driver->write_irq, fd, IRQ_NONE,
 				     line_write_interrupt, IRQF_SHARED,
 				     driver->write_irq_name, data);
 	return err;
@@ -666,8 +667,6 @@ static irqreturn_t winch_interrupt(int irq, void *data)
 		tty_kref_put(tty);
 	}
  out:
-	if (winch->fd != -1)
-		reactivate_fd(winch->fd, WINCH_IRQ);
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 29880c9..5e8881c 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -95,7 +95,6 @@ static irqreturn_t mconsole_interrupt(int irq, void *dev_id)
 	}
 	if (!list_empty(&mc_requests))
 		schedule_work(&mconsole_work);
-	reactivate_fd(fd, MCONSOLE_IRQ);
 	return IRQ_HANDLED;
 }
 
@@ -243,7 +242,6 @@ void mconsole_stop(struct mc_request *req)
 		(*req->cmd->handler)(req);
 	}
 	os_set_fd_block(req->originating_fd, 0);
-	reactivate_fd(req->originating_fd, MCONSOLE_IRQ);
 	mconsole_reply(req, "", 0, 0);
 }
 
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index f70dd54..82ea3a2 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -137,7 +137,6 @@ static irqreturn_t uml_net_interrupt(int irq, void *dev_id)
 		schedule_work(&lp->work);
 		goto out;
 	}
-	reactivate_fd(lp->fd, UM_ETH_IRQ);
 
 out:
 	spin_unlock(&lp->lock);
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index 40ca5cc..b0e9ff3 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -137,7 +137,6 @@ static void port_work_proc(struct work_struct *unused)
 		if (!port->has_connection)
 			continue;
 
-		reactivate_fd(port->fd, ACCEPT_IRQ);
 		while (port_accept(port))
 			;
 		port->has_connection = 0;
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index dd16c90..a392828 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -72,7 +72,6 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
 				return ret ? : -EAGAIN;
 
 			atomic_inc(&host_sleep_count);
-			reactivate_fd(random_fd, RANDOM_IRQ);
 			add_sigio_fd(random_fd);
 
 			add_wait_queue(&host_read_wait, &wait);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index e8ab93c..731982c 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -466,7 +466,6 @@ static void ubd_handler(void)
 		blk_end_request(req->req, 0, req->length);
 		kfree(req);
 	}
-	reactivate_fd(thread_fd, UBD_IRQ);
 
 	list_for_each_safe(list, next_ele, &restart){
 		ubd = container_of(list, struct ubd, restart);
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index df56330..0eca64c 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
@@ -9,16 +10,23 @@
 #include <sysdep/ptrace.h>
 
 struct irq_fd {
-	struct irq_fd *next;
-	void *id;
-	int fd;
-	int type;
-	int irq;
-	int events;
-	int current_events;
+        void *id;
+        int irq;
+        int events;
+};
+
+
+#define IRQ_READ  0
+#define IRQ_WRITE 1 
+#define IRQ_NONE 2
+#define MAX_IRQ_TYPE (IRQ_NONE + 1)
+
+struct irq_entry {
+        struct irq_entry *next;
+        int fd;
+	struct irq_fd * irq_array[MAX_IRQ_TYPE + 1];
 };
 
-enum { IRQ_READ, IRQ_WRITE };
 
 struct siginfo;
 extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 21d704b..c449839 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
@@ -284,15 +285,18 @@ extern void halt_skas(void);
 extern void reboot_skas(void);
 
 /* irq.c */
-extern int os_waiting_for_events(struct irq_fd *active_fds);
-extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds);
+
+extern int os_setup_epoll(int maxevents);
+extern int os_waiting_for_events_epoll(void *kernel_events, int maxevents);
+extern int os_add_epoll_fd (int events, int fd, void * data);
+extern int os_mod_epoll_fd (int events, int fd, void * data);
+extern int os_del_epoll_fd (int fd);
+
 extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
 		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2);
 extern void os_free_irq_later(struct irq_fd *active_fds,
 		int irq, void *dev_id);
-extern int os_get_pollfd(int i);
-extern void os_set_pollfd(int i, int fd);
-extern void os_set_ioignore(void);
+extern void os_close_epoll(void);
 
 /* sigio.c */
 extern int add_sigio_fd(int fd);
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 23cb935..ff3069b 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -1,4 +1,7 @@
 /*
+ * Copyright (C) 2015 Brocade Communications Ltd
+ *	Author: Anton Ivanov aivanov@{brocade.com,kot-begemot.co.uk}
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
@@ -18,6 +21,61 @@
 #include <os.h>
 
 /*
+*	We are on the "kernel side" so we cannot pick up the sys/epoll.h
+*	So we lift out of it the applicable key definitions.
+*/
+
+
+enum EPOLL_EVENTS
+  {
+	EPOLLIN = 0x001,
+#define EPOLLIN EPOLLIN
+	EPOLLPRI = 0x002,
+#define EPOLLPRI EPOLLPRI
+	EPOLLOUT = 0x004,
+#define EPOLLOUT EPOLLOUT
+	EPOLLRDNORM = 0x040,
+#define EPOLLRDNORM EPOLLRDNORM
+	EPOLLRDBAND = 0x080,
+#define EPOLLRDBAND EPOLLRDBAND
+	EPOLLWRNORM = 0x100,
+#define EPOLLWRNORM EPOLLWRNORM
+	EPOLLWRBAND = 0x200,
+#define EPOLLWRBAND EPOLLWRBAND
+	EPOLLMSG = 0x400,
+#define EPOLLMSG EPOLLMSG
+	EPOLLERR = 0x008,
+#define EPOLLERR EPOLLERR
+	EPOLLHUP = 0x010,
+#define EPOLLHUP EPOLLHUP
+	EPOLLRDHUP = 0x2000,
+#define EPOLLRDHUP EPOLLRDHUP
+	EPOLLONESHOT = (1 << 30),
+#define EPOLLONESHOT EPOLLONESHOT
+	EPOLLET = (1 << 31)
+#define EPOLLET EPOLLET
+  };
+
+
+typedef union epoll_data
+{
+	void *ptr;
+	int fd;
+	uint32_t u32;
+	uint64_t u64;
+} epoll_data_t;
+
+struct epoll_event
+{
+	uint32_t events;	/* Epoll events */
+	epoll_data_t data;	/* User data variable */
+} __attribute__ ((__packed__));
+
+#define MAX_EPOLL_EVENTS 16
+
+static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
+
+/*
  * This list is accessed under irq_lock, except in sigio_handler,
  * where it is safe from being modified.  IRQ handlers won't change it -
  * if an IRQ source has vanished, it will be freed by free_irqs just
@@ -25,44 +83,92 @@
  * list of irqs to free, with its own locking, coming back here to
  * remove list elements, taking the irq_lock to do so.
  */
-static struct irq_fd *active_fds = NULL;
-static struct irq_fd **last_irq_ptr = &active_fds;
+static struct irq_entry *active_fds = NULL;
 
 extern void free_irqs(void);
 
+
+static DEFINE_SPINLOCK(irq_lock);
+
+
+/*
+ * Principles of Operation:
+ * Each Epoll structure contains a pointer pointing back to an array
+ * with irq entries for read, write and none and their matching event
+ * masks.
+ * This allows us to stop looking up "who talked"
+ * We no longer need to enable/disable any polls while we process them
+ * epoll will take care of that. The exemption to this (for now) are
+ * character devices because of their own internal buffering, which
+ * needs to be updated to leverage the new write IRQ semantics.
+ * We can now support both read and write IRQs and have separate IRQs
+ * for read and write ops.
+ */
+
+
 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
 	struct irq_fd *irq_fd;
-	int n;
+	struct irq_entry *irq_entry;
+	unsigned long flags;
+
+	int n, i, j;
 
 	while (1) {
-		n = os_waiting_for_events(active_fds);
-		if (n <= 0) {
-			if (n == -EINTR)
-				continue;
-			else break;
-		}
 
-		for (irq_fd = active_fds; irq_fd != NULL;
-		     irq_fd = irq_fd->next) {
-			if (irq_fd->current_events != 0) {
-				irq_fd->current_events = 0;
-				do_IRQ(irq_fd->irq, regs);
-			}
+		spin_lock_irqsave(&irq_lock, flags);
+
+		n = os_waiting_for_events_epoll(
+			&epoll_events, MAX_EPOLL_EVENTS
+		);
+
+
+ 		if (n <= 0) {
+			if (n == -EINTR) { continue; }
+			else { break; }
 		}
+
+
+		for (i = 0; i < n ; i++) {
+			/* start from the data ptr, walk the tree branch */
+			irq_entry = (struct irq_entry *) epoll_events[i].data.ptr;
+			for (j = 0; j < MAX_IRQ_TYPE ; j ++ ) {
+				irq_fd = irq_entry->irq_array[j];
+				if (irq_fd != NULL) {
+					if (epoll_events[i].events & irq_fd->events) {
+						do_IRQ(irq_fd->irq, regs);
+					}
+				}
+ 			}
+ 		}
+		spin_unlock_irqrestore(&irq_lock, flags);
 	}
 
 	free_irqs();
 }
 
-static DEFINE_SPINLOCK(irq_lock);
+static int update_events(struct irq_entry * irq_entry)
+{
+	int i;
+	int events = 0;
+	struct irq_fd * irq_fd;
+	for (i = 0; i < MAX_IRQ_TYPE ; i ++ ) {
+		irq_fd = irq_entry->irq_array[i];
+		if (irq_fd != NULL) {
+			events = irq_fd->events | events;
+		}
+	}
+	/* os_add_epoll will call os_mod_epoll if this already exists */
+	return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
+}
+
 
 static int activate_fd(int irq, int fd, int type, void *dev_id)
 {
-	struct pollfd *tmp_pfd;
-	struct irq_fd *new_fd, *irq_fd;
+	struct irq_fd *new_fd;
+	struct irq_entry * irq_entry;
 	unsigned long flags;
-	int events, err, n;
+	int  i, err, events;
 
 	err = os_set_fd_async(fd);
 	if (err < 0)
@@ -74,186 +180,152 @@ static int activate_fd(int irq, int fd, int type, void *dev_id)
 		goto out;
 
 	if (type == IRQ_READ)
-		events = UM_POLLIN | UM_POLLPRI;
-	else events = UM_POLLOUT;
-	*new_fd = ((struct irq_fd) { .next  		= NULL,
-				     .id 		= dev_id,
-				     .fd 		= fd,
-				     .type 		= type,
-				     .irq 		= irq,
-				     .events 		= events,
-				     .current_events 	= 0 } );
-
-	err = -EBUSY;
-	spin_lock_irqsave(&irq_lock, flags);
-	for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
-		if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
-			printk(KERN_ERR "Registering fd %d twice\n", fd);
-			printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq);
-			printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id,
-			       dev_id);
-			goto out_unlock;
-		}
-	}
-
+		events |= EPOLLIN | EPOLLPRI;
 	if (type == IRQ_WRITE)
-		fd = -1;
+		events |= EPOLLOUT;
 
-	tmp_pfd = NULL;
-	n = 0;
+	*new_fd = ((struct irq_fd) {
+		.id 		= dev_id,
+		.irq 		= irq,
+		.events 	= events
+	});
 
-	while (1) {
-		n = os_create_pollfd(fd, events, tmp_pfd, n);
-		if (n == 0)
-			break;
+	err = -EBUSY;
 
-		/*
-		 * n > 0
-		 * It means we couldn't put new pollfd to current pollfds
-		 * and tmp_fds is NULL or too small for new pollfds array.
-		 * Needed size is equal to n as minimum.
-		 *
-		 * Here we have to drop the lock in order to call
-		 * kmalloc, which might sleep.
-		 * If something else came in and changed the pollfds array
-		 * so we will not be able to put new pollfd struct to pollfds
-		 * then we free the buffer tmp_fds and try again.
-		 */
-		spin_unlock_irqrestore(&irq_lock, flags);
-		kfree(tmp_pfd);
+	spin_lock_irqsave(&irq_lock, flags);
 
-		tmp_pfd = kmalloc(n, GFP_KERNEL);
-		if (tmp_pfd == NULL)
-			goto out_kfree;
+	for (irq_entry = active_fds; irq_entry != NULL; irq_entry = irq_entry->next) {
+		if (irq_entry->fd == fd) break;
+	}
 
-		spin_lock_irqsave(&irq_lock, flags);
+	if (irq_entry == NULL) {
+		irq_entry = kmalloc(sizeof(struct irq_entry), GFP_KERNEL);
+		if (irq_entry == NULL) {
+			printk(KERN_ERR
+				"Failed to allocate new IRQ entry\n");
+			kfree(new_fd);
+			goto out;
+		}
+		irq_entry->fd = fd;
+		for (i = 0; i < MAX_IRQ_TYPE; i++) {
+			irq_entry->irq_array[i] = NULL;
+		}
+		irq_entry->next = active_fds;
+		active_fds = irq_entry;
 	}
 
-	*last_irq_ptr = new_fd;
-	last_irq_ptr = &new_fd->next;
+	if (irq_entry->irq_array[type] != NULL) {
+		printk(KERN_ERR
+			"Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
+			irq, fd, type, dev_id
+		);
+		goto out_unlock;
+	} else {
+		irq_entry->irq_array[type] = new_fd;
+	}
 
+	update_events(irq_entry);
+	
 	spin_unlock_irqrestore(&irq_lock, flags);
 
-	/*
-	 * This calls activate_fd, so it has to be outside the critical
-	 * section.
-	 */
-	maybe_sigio_broken(fd, (type == IRQ_READ));
+	maybe_sigio_broken(fd, (type != IRQ_NONE));
 
 	return 0;
 
  out_unlock:
 	spin_unlock_irqrestore(&irq_lock, flags);
- out_kfree:
 	kfree(new_fd);
  out:
 	return err;
 }
 
-static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
-{
-	unsigned long flags;
 
-	spin_lock_irqsave(&irq_lock, flags);
-	os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
-	spin_unlock_irqrestore(&irq_lock, flags);
-}
-
-struct irq_and_dev {
-	int irq;
-	void *dev;
-};
-
-static int same_irq_and_dev(struct irq_fd *irq, void *d)
+static void do_free_by_irq_and_dev(
+	struct irq_entry* irq_entry,
+	unsigned int irq,
+	void * dev
+)
 {
-	struct irq_and_dev *data = d;
-
-	return ((irq->irq == data->irq) && (irq->id == data->dev));
+	int i;
+	struct irq_fd * to_free;
+	for (i = 0; i < MAX_IRQ_TYPE ; i ++ ) {
+		if (irq_entry->irq_array[i] != NULL) {
+			if (
+				(irq_entry->irq_array[i]->irq == irq) &&
+				(irq_entry->irq_array[i]->id == dev)
+			) {
+				to_free = irq_entry->irq_array[i];
+				irq_entry->irq_array[i] = NULL;
+				update_events(irq_entry);
+				kfree(to_free);
+			}
+		}
+	}
 }
 
-static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
+void free_irq_by_fd(int fd)
 {
-	struct irq_and_dev data = ((struct irq_and_dev) { .irq  = irq,
-							  .dev  = dev });
 
-	free_irq_by_cb(same_irq_and_dev, &data);
-}
+	struct irq_entry *irq_entry, *prev = NULL;
+	unsigned long flags;
+	int i;
 
-static int same_fd(struct irq_fd *irq, void *fd)
-{
-	return (irq->fd == *((int *)fd));
+	spin_lock_irqsave(&irq_lock, flags);
+	for (irq_entry = active_fds; irq_entry != NULL; irq_entry = irq_entry->next) {
+		if (irq_entry->fd == irq_entry->fd) {
+			os_del_epoll_fd(fd);   /* ignore err, just do it */
+			for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+				if (irq_entry->irq_array[i] != NULL) {
+					kfree(irq_entry->irq_array[i]);
+				}
+			}
+			if (prev == NULL) {
+				active_fds = irq_entry->next;
+			} else {
+				prev->next = irq_entry->next;
+			}
+			kfree(irq_entry);
+		} else {
+			prev = irq_entry;
+		}
+	}
+	spin_unlock_irqrestore(&irq_lock, flags);
+	
 }
 
-void free_irq_by_fd(int fd)
-{
-	free_irq_by_cb(same_fd, &fd);
-}
 
-/* Must be called with irq_lock held */
-static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
+static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
 {
-	struct irq_fd *irq;
-	int i = 0;
-	int fdi;
-
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		if ((irq->fd == fd) && (irq->irq == irqnum))
-			break;
-		i++;
-	}
-	if (irq == NULL) {
-		printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n",
-		       fd);
-		goto out;
-	}
-	fdi = os_get_pollfd(i);
-	if ((fdi != -1) && (fdi != fd)) {
-		printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
-		       "and pollfds, fd %d vs %d, need %d\n", irq->fd,
-		       fdi, fd);
-		irq = NULL;
-		goto out;
-	}
-	*index_out = i;
- out:
-	return irq;
-}
 
-void reactivate_fd(int fd, int irqnum)
-{
-	struct irq_fd *irq;
+	struct irq_entry *irq_entry;
 	unsigned long flags;
-	int i;
 
 	spin_lock_irqsave(&irq_lock, flags);
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL) {
-		spin_unlock_irqrestore(&irq_lock, flags);
-		return;
+	for (irq_entry = active_fds; irq_entry != NULL; irq_entry = irq_entry->next) {
+		do_free_by_irq_and_dev(irq_entry, irq, dev);
 	}
-	os_set_pollfd(i, irq->fd);
 	spin_unlock_irqrestore(&irq_lock, flags);
-
-	add_sigio_fd(fd);
+	
 }
 
-void deactivate_fd(int fd, int irqnum)
+
+void reactivate_fd(int fd, int irqnum)
 {
-	struct irq_fd *irq;
+	struct irq_entry *irq_entry;
 	unsigned long flags;
-	int i;
-
 	spin_lock_irqsave(&irq_lock, flags);
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL) {
-		spin_unlock_irqrestore(&irq_lock, flags);
-		return;
+	for (irq_entry = active_fds; irq_entry != NULL; irq_entry = irq_entry->next) {
+		if (irq_entry->fd == fd) {
+			update_events(irq_entry);
+		}
 	}
-
-	os_set_pollfd(i, -1);
 	spin_unlock_irqrestore(&irq_lock, flags);
+	
+}
 
-	ignore_sigio_fd(fd);
+void deactivate_fd(int fd, int irqnum)
+{
+	os_del_epoll_fd(fd);   /* ignore err, just do it */
 }
 EXPORT_SYMBOL(deactivate_fd);
 
@@ -265,17 +337,17 @@ EXPORT_SYMBOL(deactivate_fd);
  */
 int deactivate_all_fds(void)
 {
-	struct irq_fd *irq;
+	struct irq_entry * irq_entry;
 	int err;
 
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		err = os_clear_fd_async(irq->fd);
-		if (err)
-			return err;
+	for (irq_entry = active_fds; irq_entry != NULL; irq_entry = irq_entry->next) {
+		os_del_epoll_fd(irq_entry->fd);   /* ignore err, just do it */
+		err = os_clear_fd_async(irq_entry->fd);
+		if (err) {
+			printk(KERN_ERR "Clear FD async failed with %d", err);
+		}
 	}
-	/* If there is a signal already queued, after unblocking ignore it */
-	os_set_ioignore();
-
+	os_close_epoll();
 	return 0;
 }
 
@@ -308,13 +380,13 @@ int um_request_irq(unsigned int irq, int fd, int type,
 {
 	int err;
 
-	if (fd != -1) {
+	err = request_irq(irq, handler, irqflags, devname, dev_id);
+
+	if ((!err) && (fd != -1)) {
 		err = activate_fd(irq, fd, type, dev_id);
-		if (err)
-			return err;
 	}
 
-	return request_irq(irq, handler, irqflags, devname, dev_id);
+	return err;
 }
 
 EXPORT_SYMBOL(um_request_irq);
@@ -352,9 +424,9 @@ void __init init_IRQ(void)
 	int i;
 
 	irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
-
-	for (i = 1; i < NR_IRQS; i++)
+	for (i = 1; i < NR_IRQS - 1 ; i++)
 		irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
+	os_setup_epoll(MAX_EPOLL_EVENTS);
 }
 
 /*
@@ -382,11 +454,11 @@ void __init init_IRQ(void)
  * thread_info.
  *
  * There are three cases -
- *     The first interrupt on the stack - sets up the thread_info and
+ *	 The first interrupt on the stack - sets up the thread_info and
  * handles the interrupt
- *     A nested interrupt interrupting the copying of the thread_info -
+ *	 A nested interrupt interrupting the copying of the thread_info -
  * can't handle the interrupt, as the stack is in an unknown state
- *     A nested interrupt not interrupting the copying of the
+ *	 A nested interrupt not interrupting the copying of the
  * thread_info - doesn't do any setup, just handles the interrupt
  *
  * The first job is to figure out whether we interrupted stack setup.
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index b9afb74..81b135a 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
@@ -6,6 +7,7 @@
 #include <stdlib.h>
 #include <errno.h>
 #include <poll.h>
+#include <sys/epoll.h>
 #include <signal.h>
 #include <string.h>
 #include <irq_user.h>
@@ -16,120 +18,88 @@
  * Locked by irq_lock in arch/um/kernel/irq.c.  Changed by os_create_pollfd
  * and os_free_irq_by_cb, which are called under irq_lock.
  */
-static struct pollfd *pollfds = NULL;
-static int pollfds_num = 0;
-static int pollfds_size = 0;
 
-int os_waiting_for_events(struct irq_fd *active_fds)
+/* epoll support */
+
+
+static int epollfd = -1;
+
+int os_setup_epoll(int maxevents) {
+	epollfd = epoll_create(maxevents);
+	return epollfd;
+}
+
+int os_waiting_for_events_epoll(void *kernel_events, int maxevents)
 {
-	struct irq_fd *irq_fd;
-	int i, n, err;
+	int n, err;
 
-	n = poll(pollfds, pollfds_num, 0);
+	n = epoll_wait(epollfd,
+		(struct epoll_event *) kernel_events, maxevents, 0);
 	if (n < 0) {
 		err = -errno;
 		if (errno != EINTR)
-			printk(UM_KERN_ERR "os_waiting_for_events:"
-			       " poll returned %d, errno = %d\n", n, errno);
+			printk(
+				UM_KERN_ERR "os_waiting_for_events:"
+				" poll returned %d, error = %s\n", n,
+				strerror(errno)
+			);
 		return err;
 	}
 
-	if (n == 0)
-		return 0;
+	return n;
+}
 
-	irq_fd = active_fds;
+int os_add_epoll_fd (int events, int fd, void * data) {
+	struct epoll_event event;
+	int result;
 
-	for (i = 0; i < pollfds_num; i++) {
-		if (pollfds[i].revents != 0) {
-			irq_fd->current_events = pollfds[i].revents;
-			pollfds[i].fd = -1;
-		}
-		irq_fd = irq_fd->next;
+	event.data.ptr = data;
+	event.events = events | EPOLLET;
+	result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+	if ((result) && (errno == EEXIST)) {
+		result = os_mod_epoll_fd (events, fd, data);
 	}
-	return n;
+	if (result) {
+		printk("epollctl add err fd %d, %s\n", fd, strerror(errno));
+	}
+	return result;
 }
 
-int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
-{
-	if (pollfds_num == pollfds_size) {
-		if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) {
-			/* return min size needed for new pollfds area */
-			return (pollfds_size + 1) * sizeof(pollfds[0]);
-		}
-
-		if (pollfds != NULL) {
-			memcpy(tmp_pfd, pollfds,
-			       sizeof(pollfds[0]) * pollfds_size);
-			/* remove old pollfds */
-			kfree(pollfds);
-		}
-		pollfds = tmp_pfd;
-		pollfds_size++;
-	} else
-		kfree(tmp_pfd);	/* remove not used tmp_pfd */
-
-	pollfds[pollfds_num] = ((struct pollfd) { .fd		= fd,
-						  .events	= events,
-						  .revents	= 0 });
-	pollfds_num++;
-
-	return 0;
+int os_mod_epoll_fd (int events, int fd, void * data) {
+	struct epoll_event event;
+	int result;
+	event.data.ptr = data;
+	event.events = events;
+	result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event);
+	if (result) {
+		printk("epollctl mod err fd %d, %s\n", fd, strerror(errno));
+	}
+	return result;
 }
 
-void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
-		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
-{
-	struct irq_fd **prev;
-	int i = 0;
-
-	prev = &active_fds;
-	while (*prev != NULL) {
-		if ((*test)(*prev, arg)) {
-			struct irq_fd *old_fd = *prev;
-			if ((pollfds[i].fd != -1) &&
-			    (pollfds[i].fd != (*prev)->fd)) {
-				printk(UM_KERN_ERR "os_free_irq_by_cb - "
-				       "mismatch between active_fds and "
-				       "pollfds, fd %d vs %d\n",
-				       (*prev)->fd, pollfds[i].fd);
-				goto out;
-			}
-
-			pollfds_num--;
-
-			/*
-			 * This moves the *whole* array after pollfds[i]
-			 * (though it doesn't spot as such)!
-			 */
-			memmove(&pollfds[i], &pollfds[i + 1],
-			       (pollfds_num - i) * sizeof(pollfds[0]));
-			if (*last_irq_ptr2 == &old_fd->next)
-				*last_irq_ptr2 = prev;
-
-			*prev = (*prev)->next;
-			if (old_fd->type == IRQ_WRITE)
-				ignore_sigio_fd(old_fd->fd);
-			kfree(old_fd);
-			continue;
-		}
-		prev = &(*prev)->next;
-		i++;
+int os_del_epoll_fd (int fd) {
+	struct epoll_event event;
+	int result;
+	result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
+	if (result) {
+		printk("epollctl del err %s\n", strerror(errno));
 	}
- out:
-	return;
+	return result;
 }
 
-int os_get_pollfd(int i)
+void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
+		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
 {
-	return pollfds[i].fd;
+	printk("Someone invoking obsolete deactivate_by_CB!!!\n");
+	return;
 }
 
-void os_set_pollfd(int i, int fd)
+void os_set_ioignore(void)
 {
-	pollfds[i].fd = fd;
+	signal(SIGIO, SIG_IGN);
 }
 
-void os_set_ioignore(void)
+void os_close_epoll(void)
 {
-	signal(SIGIO, SIG_IGN);
+	os_close_file(epollfd);
 }
-- 
2.1.4


------------------------------------------------------------------------------
Presto, an open source distributed SQL query engine for big data, initially
developed by Facebook, enables you to easily query your data on Hadoop in a 
more interactive manner. Teradata is also now providing full enterprise
support for Presto. Download a free open source copy now.
http://pubads.g.doubleclick.net/gampad/clk?id=250295911&iu=/4140
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel


             reply	other threads:[~2015-11-09 14:33 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-09 14:33 Anton Ivanov [this message]
2015-11-09 15:03 ` [uml-devel] [PATCH v2] EPOLL Interrupt Controller V2.0 Anton Ivanov
2015-11-10 18:42   ` Anton Ivanov
2015-11-10 20:24     ` Richard Weinberger
2015-11-11 20:46   ` Thomas Meyer
2015-11-11 21:05     ` Richard Weinberger
2015-11-11 21:39       ` Anton Ivanov
2015-11-11 21:49         ` stian
2015-11-11 23:25           ` Anton Ivanov
2015-11-12 12:29       ` Anton Ivanov
2015-11-12 15:23         ` Anton Ivanov
2015-11-12 16:03           ` Anton Ivanov
2015-11-16  8:09             ` Anton Ivanov
2015-11-18  8:33               ` Anton Ivanov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1447079597-17816-1-git-send-email-aivanov@brocade.com \
    --to=aivanov@brocade.com \
    --cc=user-mode-linux-devel@lists.sourceforge.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.