linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Manfred <manfred@colorfullife.com>
To: linux-kernel@vger.kernel.org
Subject: [PATCH] up to 50% faster sys_poll()
Date: Fri, 05 Jan 2001 14:28:59 +0100	[thread overview]
Message-ID: <3A55CC1B.602719B3@colorfullife.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 463 bytes --]

sys_poll spends around 1/2 of the execution time allocating / freeing a
few bytes temporary memory.

The attached patch tries to avoid these allocation by using the stack -
usually only a few bytes are needed, kmalloc is used for the rest.

The result: one poll of stdin is down from 1736 cpu ticks to 865 cpu
ticks (Pentium II/350 SMP, SMP kernel)

select() should also be faster, but I haven't written a micro-benchmark
for select.

Please test it,

--
	Manfred

[-- Attachment #2: patch-poll --]
[-- Type: text/plain, Size: 9912 bytes --]

// $Header$
// Kernel Version:
//  VERSION = 2
//  PATCHLEVEL = 4
//  SUBLEVEL = 0
//  EXTRAVERSION =
--- 2.4/fs/select.c	Thu Nov 16 21:54:18 2000
+++ build-2.4/fs/select.c	Fri Jan  5 13:46:30 2001
@@ -24,12 +24,6 @@
 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
 
-struct poll_table_entry {
-	struct file * filp;
-	wait_queue_t wait;
-	wait_queue_head_t * wait_address;
-};
-
 struct poll_table_page {
 	struct poll_table_page * next;
 	struct poll_table_entry * entry;
@@ -52,11 +46,36 @@
  * poll table.
  */
 
+/*
+ * Memory free and alloc took a significant part of the total
+ * sys_poll()/sys_select() execution time, thus I moved several
+ * structures on the stack:
+ * - sys_select has a 192 byte (enough for 256 fds) buffer on the stack.
+ *   Please avoid selecting more than 5000 descriptors
+ *   (kmalloc > 4096 bytes), and you can't select
+ *   more than 170.000 fds (kmalloc > 128 kB)
+ * - sys_poll stores the first 24 file descriptors on the
+ *   stack. If more than 24 descriptors are polled, then
+ *   additional memory is allocated, but the first 24 descriptors
+ *   always lie on the stack.
+ * - the poll table contains 8 wait queue entries. This means that no dynamic
+ *   memory allocation is necessary for the wait queues if one of the first
+ *   8 file descriptors has new data.
+ * <manfred@colorfullife.com>
+ */
+
 void poll_freewait(poll_table* pt)
 {
 	struct poll_table_page * p = pt->table;
+	struct poll_table_entry * entry;
+	entry = pt->internal + pt->nr;
+	while(pt->nr > 0) {
+		pt->nr--;
+		entry--;
+		remove_wait_queue(entry->wait_address,&entry->wait);
+		fput(entry->filp);
+	}
 	while (p) {
-		struct poll_table_entry * entry;
 		struct poll_table_page *old;
 
 		entry = p->entry;
@@ -73,33 +92,36 @@
 
 void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
 {
-	struct poll_table_page *table = p->table;
-
-	if (!table || POLL_TABLE_FULL(table)) {
-		struct poll_table_page *new_table;
+	struct poll_table_entry * entry;
 
-		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
-		if (!new_table) {
-			p->error = -ENOMEM;
-			__set_current_state(TASK_RUNNING);
-			return;
+	if(p->nr < POLL_TABLE_INTERNAL) {
+		entry = p->internal+p->nr++;
+	} else {
+		struct poll_table_page *table = p->table;
+
+		if (!table || POLL_TABLE_FULL(table)) {
+			struct poll_table_page *new_table;
+
+			new_table = kmalloc(PAGE_SIZE, GFP_KERNEL);
+			if (!new_table) {
+				p->error = -ENOMEM;
+				__set_current_state(TASK_RUNNING);
+				return;
+			}
+			new_table->entry = new_table->entries;
+			new_table->next = table;
+			p->table = new_table;
+			table = new_table;
 		}
-		new_table->entry = new_table->entries;
-		new_table->next = table;
-		p->table = new_table;
-		table = new_table;
-	}
-
-	/* Add a new entry */
-	{
-		struct poll_table_entry * entry = table->entry;
+		entry = table->entry;
 		table->entry = entry+1;
-	 	get_file(filp);
-	 	entry->filp = filp;
-		entry->wait_address = wait_address;
-		init_waitqueue_entry(&entry->wait, current);
-		add_wait_queue(wait_address,&entry->wait);
 	}
+	/* Add a new entry */
+	get_file(filp);
+	entry->filp = filp;
+	entry->wait_address = wait_address;
+	init_waitqueue_entry(&entry->wait, current);
+	add_wait_queue(wait_address,&entry->wait);
 }
 
 #define __IN(fds, n)		(fds->in + n)
@@ -233,14 +255,18 @@
 	return retval;
 }
 
-static void *select_bits_alloc(int size)
+#define SELECT_INLINE_BYTES	32
+static inline void *select_bits_alloc(int size, void* internal)
 {
+	if(size <= SELECT_INLINE_BYTES)
+		return internal;
 	return kmalloc(6 * size, GFP_KERNEL);
 }
 
-static void select_bits_free(void *bits, int size)
+static inline void select_bits_free(void *bits, void* internal)
 {
-	kfree(bits);
+	if(bits != internal)
+		kfree(bits);
 }
 
 /*
@@ -254,10 +280,12 @@
 #define MAX_SELECT_SECONDS \
 	((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
 
+
 asmlinkage long
 sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
 {
 	fd_set_bits fds;
+	char ibuf[6*SELECT_INLINE_BYTES];
 	char *bits;
 	long timeout;
 	int ret, size;
@@ -295,7 +323,7 @@
 	 */
 	ret = -ENOMEM;
 	size = FDS_BYTES(n);
-	bits = select_bits_alloc(size);
+	bits = select_bits_alloc(size, ibuf);
 	if (!bits)
 		goto out_nofds;
 	fds.in      = (unsigned long *)  bits;
@@ -340,12 +368,18 @@
 	set_fd_set(n, exp, fds.res_ex);
 
 out:
-	select_bits_free(bits, size);
+	select_bits_free(bits, ibuf);
 out_nofds:
 	return ret;
 }
 
-#define POLLFD_PER_PAGE  ((PAGE_SIZE) / sizeof(struct pollfd))
+struct poll_list {
+	struct poll_list *next;
+	int len;
+	struct pollfd entries[0];
+};
+
+#define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
 
 static void do_pollfd(unsigned int num, struct pollfd * fdpage,
 	poll_table ** pwait, int *count)
@@ -379,39 +413,44 @@
 	}
 }
 
-static int do_poll(unsigned int nfds, unsigned int nchunks, unsigned int nleft, 
-	struct pollfd *fds[], poll_table *wait, long timeout)
+static int do_poll(int nfds, struct poll_list *list,
+			poll_table *wait, long timeout)
 {
-	int count;
+	int count = 0;
 	poll_table* pt = wait;
-
+ 
 	for (;;) {
-		unsigned int i;
-
+		struct poll_list* walk;
 		set_current_state(TASK_INTERRUPTIBLE);
-		count = 0;
-		for (i=0; i < nchunks; i++)
-			do_pollfd(POLLFD_PER_PAGE, fds[i], &pt, &count);
-		if (nleft)
-			do_pollfd(nleft, fds[nchunks], &pt, &count);
+		walk = list;
+		while(walk != NULL) {
+			do_pollfd( walk->len, walk->entries, &pt, &count);
+			walk = walk->next;
+		}
 		pt = NULL;
 		if (count || !timeout || signal_pending(current))
 			break;
 		count = wait->error;
 		if (count)
 			break;
+
 		timeout = schedule_timeout(timeout);
 	}
 	current->state = TASK_RUNNING;
 	return count;
 }
 
+#define INLINE_POLL_COUNT	24
 asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
 {
-	int i, j, fdcount, err;
-	struct pollfd **fds;
+	int fdcount, err;
+	unsigned int i;
+	struct poll_list *pollwalk;
+	struct {
+		struct poll_list head;
+		struct pollfd entries[INLINE_POLL_COUNT];
+	} polldata;
 	poll_table table, *wait;
-	int nchunks, nleft;
 
 	/* Do a sanity check on nfds ... */
 	if (nfds > current->files->max_fds)
@@ -431,63 +470,65 @@
 		wait = NULL;
 
 	err = -ENOMEM;
-	fds = NULL;
-	if (nfds != 0) {
-		fds = (struct pollfd **)kmalloc(
-			(1 + (nfds - 1) / POLLFD_PER_PAGE) * sizeof(struct pollfd *),
-			GFP_KERNEL);
-		if (fds == NULL)
-			goto out;
-	}
+	polldata.head.next = NULL;
+	polldata.head.len = INLINE_POLL_COUNT;
+	if(nfds <= INLINE_POLL_COUNT)
+		polldata.head.len = nfds;
 
-	nchunks = 0;
-	nleft = nfds;
-	while (nleft > POLLFD_PER_PAGE) { /* allocate complete PAGE_SIZE chunks */
-		fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
-		if (fds[nchunks] == NULL)
+	pollwalk = &polldata.head;
+	i = nfds;
+	err = -ENOMEM;
+	goto start;
+	while(i!=0) {
+		struct poll_list *pp;
+		pp = kmalloc(sizeof(struct poll_list)+
+				sizeof(struct pollfd)*
+				(i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
+					GFP_KERNEL);
+		if(pp==NULL)
 			goto out_fds;
-		nchunks++;
-		nleft -= POLLFD_PER_PAGE;
-	}
-	if (nleft) { /* allocate last PAGE_SIZE chunk, only nleft elements used */
-		fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
-		if (fds[nchunks] == NULL)
+		pp->next=NULL;
+		pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i);
+		pollwalk->next = pp;
+		pollwalk = pp;
+start:
+		if (copy_from_user(pollwalk+1, ufds + nfds-i, 
+				sizeof(struct pollfd)*pollwalk->len)) {
+			err = -EFAULT;
 			goto out_fds;
+		}
+		i -= pollwalk->len;
 	}
+		
+	fdcount = do_poll(nfds, &polldata.head,
+			wait, timeout);
 
+	/* OK, now copy the revents fields back to user space. */
+	i = nfds;
+	pollwalk = &polldata.head;
 	err = -EFAULT;
-	for (i=0; i < nchunks; i++)
-		if (copy_from_user(fds[i], ufds + i*POLLFD_PER_PAGE, PAGE_SIZE))
-			goto out_fds1;
-	if (nleft) {
-		if (copy_from_user(fds[nchunks], ufds + nchunks*POLLFD_PER_PAGE, 
-				nleft * sizeof(struct pollfd)))
-			goto out_fds1;
+	while(pollwalk != NULL) {
+		struct pollfd * fds = pollwalk->entries;
+		int j;
+
+		for (j=0; j < pollwalk->len; j++, ufds++) {
+			if(__put_user(fds[j].revents, &ufds->revents))
+				goto out_fds;
+		}
+		i -= pollwalk->len;
+		pollwalk = pollwalk->next;
 	}
-
-	fdcount = do_poll(nfds, nchunks, nleft, fds, wait, timeout);
-
-	/* OK, now copy the revents fields back to user space. */
-	for(i=0; i < nchunks; i++)
-		for (j=0; j < POLLFD_PER_PAGE; j++, ufds++)
-			__put_user((fds[i] + j)->revents, &ufds->revents);
-	if (nleft)
-		for (j=0; j < nleft; j++, ufds++)
-			__put_user((fds[nchunks] + j)->revents, &ufds->revents);
-
 	err = fdcount;
 	if (!fdcount && signal_pending(current))
 		err = -EINTR;
 
-out_fds1:
-	if (nleft)
-		free_page((unsigned long)(fds[nchunks]));
 out_fds:
-	for (i=0; i < nchunks; i++)
-		free_page((unsigned long)(fds[i]));
-	if (nfds != 0)
-		kfree(fds);
-out:
+	pollwalk = polldata.head.next;
+	while(pollwalk!=NULL) {
+		struct poll_list *pp = pollwalk->next;
+		kfree(pollwalk);
+		pollwalk = pp;
+	}
 	poll_freewait(&table);
 	return err;
 }
--- 2.4/include/linux/poll.h	Thu Jan  4 23:51:10 2001
+++ build-2.4/include/linux/poll.h	Fri Jan  5 13:46:30 2001
@@ -12,9 +12,18 @@
 
 struct poll_table_page;
 
+struct poll_table_entry {
+	struct file * filp;
+	wait_queue_t wait;
+	wait_queue_head_t * wait_address;
+};
+
+#define POLL_TABLE_INTERNAL	8
 typedef struct poll_table_struct {
 	int error;
+	int nr;
 	struct poll_table_page * table;
+	struct poll_table_entry internal[POLL_TABLE_INTERNAL];
 } poll_table;
 
 extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p);
@@ -28,6 +37,7 @@
 static inline void poll_initwait(poll_table* pt)
 {
 	pt->error = 0;
+	pt->nr = 0;
 	pt->table = NULL;
 }
 extern void poll_freewait(poll_table* pt);


             reply	other threads:[~2001-01-05 13:29 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2001-01-05 13:28 Manfred [this message]
2001-01-06 21:58 [PATCH] up to 50% faster sys_poll() Dan Kegel
     [not found] ` <3A57A668.7D4D8E46@colorfullife.com>
2001-01-07  0:13   ` Dan Kegel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3A55CC1B.602719B3@colorfullife.com \
    --to=manfred@colorfullife.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).