* [PATCH] pepoll_wait ...
@ 2006-01-18 3:36 Davide Libenzi
2006-01-18 4:20 ` Ulrich Drepper
2006-01-18 18:38 ` Michael Tokarev
0 siblings, 2 replies; 11+ messages in thread
From: Davide Libenzi @ 2006-01-18 3:36 UTC (permalink / raw)
To: Linux Kernel Mailing List
Cc: David Miller, Ulrich Drepper, Andrew Morton, David Woodhouse
[-- Attachment #1: Type: TEXT/PLAIN, Size: 1206 bytes --]
The attached patch implements the pepoll_wait system call, that extend the
event wait mechanism with the same logic ppoll and pselect do. The definition
of pepoll_wait is:
int pepoll_wait(int epfd, struct epoll_event *events, int maxevents,
int timeout, const sigset_t *sigmask, size_t sigsetsize);
The difference between the vanilla epoll_wait and pepoll_wait is that the
latter allows the caller to specify a signal mask to be set while waiting for
events. Hence pepoll_wait will wait until either one monitored event, or an
unmasked signal happen. If sigmask is NULL, the pepoll_wait system call will
act exactly like epoll_wait. For the POSIX definition of pselect, information
is available here:
http://www.opengroup.org/onlinepubs/009695399/functions/select.html
This patch goes over 2.6.15-mm4 and depends on the TIF_RESTORE_SIGMASK bits.
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
- Davide
arch/i386/kernel/syscall_table.S | 1
fs/eventpoll.c | 58 ++++++++++++++++++++++++++++++++++-----
include/asm-i386/unistd.h | 3 +-
include/linux/syscalls.h | 3 ++
4 files changed, 57 insertions(+), 8 deletions(-)
[-- Attachment #2: Type: TEXT/PLAIN, Size: 5351 bytes --]
diff -Nru linux-2.6.15/arch/i386/kernel/syscall_table.S linux-2.6.15.mod/arch/i386/kernel/syscall_table.S
--- linux-2.6.15/arch/i386/kernel/syscall_table.S 2006-01-17 17:11:12.000000000 -0800
+++ linux-2.6.15.mod/arch/i386/kernel/syscall_table.S 2006-01-17 17:11:59.000000000 -0800
@@ -310,3 +310,4 @@
.long sys_pselect6
.long sys_ppoll
.long sys_unshare /* 310 */
+ .long sys_pepoll_wait
diff -Nru linux-2.6.15/fs/eventpoll.c linux-2.6.15.mod/fs/eventpoll.c
--- linux-2.6.15/fs/eventpoll.c 2006-01-17 17:11:14.000000000 -0800
+++ linux-2.6.15.mod/fs/eventpoll.c 2006-01-17 17:11:59.000000000 -0800
@@ -105,6 +105,8 @@
/* Maximum msec timeout value storeable in a long int */
#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
+#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
+
struct epoll_filefd {
struct file *file;
@@ -649,24 +651,25 @@
return error;
}
-#define MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
/*
* Implement the event wait interface for the eventpoll file. It is the kernel
- * part of the user space epoll_wait(2).
+ * part of the user space pepoll_wait(2).
*/
-asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
- int maxevents, int timeout)
+asmlinkage long sys_pepoll_wait(int epfd, struct epoll_event __user *events,
+ int maxevents, int timeout, const sigset_t __user *sigmask,
+ size_t sigsetsize)
{
int error;
+ sigset_t ksigmask, sigsaved;
struct file *file;
struct eventpoll *ep;
- DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n",
+ DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_pepoll_wait(%d, %p, %d, %d)\n",
current, epfd, events, maxevents, timeout));
/* The maximum number of event must be greater than zero */
- if (maxevents <= 0 || maxevents > MAX_EVENTS)
+ if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
return -EINVAL;
/* Verify that the area passed by the user is writeable */
@@ -695,13 +698,41 @@
*/
ep = file->private_data;
+ /*
+ * If the caller wants a certain signal mask to be set during the wait,
+ * we apply it here.
+ */
+ if (sigmask) {
+ if (sigsetsize != sizeof(sigset_t))
+ goto eexit_2;
+ error = -EFAULT;
+ if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+ goto eexit_2;
+ sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+ sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ }
+
/* Time to fish for events ... */
error = ep_poll(ep, events, maxevents, timeout);
+ /*
+ * If we changed the signal mask, we need to restore the original one.
+ * In case we've got a signal while waiting, we do not restore the signal
+ * mask yet, and we allow do_signal() to deliver the signal on the way back
+ * to userspace, before the signal mask is restored.
+ */
+ if (error == -EINTR) {
+ if (sigmask) {
+ memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved));
+ set_thread_flag(TIF_RESTORE_SIGMASK);
+ }
+ } else if (sigmask)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
eexit_2:
fput(file);
eexit_1:
- DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n",
+ DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_pepoll_wait(%d, %p, %d, %d) = %d\n",
current, epfd, events, maxevents, timeout, error));
return error;
@@ -709,6 +740,19 @@
/*
+ * Implement the event wait interface for the eventpoll file. It is the kernel
+ * part of the user space epoll_wait(2). This just calls the super-sister
+ * sys_pepoll_wait() without signal parameters.
+ */
+asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
+ int maxevents, int timeout)
+{
+
+ return sys_pepoll_wait(epfd, events, maxevents, timeout, NULL, 0);
+}
+
+
+/*
* Creates the file descriptor to be used by the epoll interface.
*/
static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
diff -Nru linux-2.6.15/include/asm-i386/unistd.h linux-2.6.15.mod/include/asm-i386/unistd.h
--- linux-2.6.15/include/asm-i386/unistd.h 2006-01-17 17:11:14.000000000 -0800
+++ linux-2.6.15.mod/include/asm-i386/unistd.h 2006-01-17 17:11:59.000000000 -0800
@@ -316,8 +316,9 @@
#define __NR_pselect6 308
#define __NR_ppoll 309
#define __NR_unshare 310
+#define __NR_pepoll_wait 311
-#define NR_syscalls 311
+#define NR_syscalls 312
/*
* user-visible error numbers are in the range -1 - -128: see
diff -Nru linux-2.6.15/include/linux/syscalls.h linux-2.6.15.mod/include/linux/syscalls.h
--- linux-2.6.15/include/linux/syscalls.h 2006-01-17 17:11:15.000000000 -0800
+++ linux-2.6.15.mod/include/linux/syscalls.h 2006-01-17 17:12:59.000000000 -0800
@@ -428,6 +428,9 @@
struct epoll_event __user *event);
asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
int maxevents, int timeout);
+asmlinkage long sys_pepoll_wait(int epfd, struct epoll_event __user *events,
+ int maxevents, int timeout, const sigset_t __user *sigmask,
+ size_t sigsetsize);
asmlinkage long sys_gethostname(char __user *name, int len);
asmlinkage long sys_sethostname(char __user *name, int len);
asmlinkage long sys_setdomainname(char __user *name, int len);
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] pepoll_wait ...
2006-01-18 3:36 [PATCH] pepoll_wait Davide Libenzi
@ 2006-01-18 4:20 ` Ulrich Drepper
2006-01-18 5:03 ` Andrew Morton
2006-01-18 7:38 ` Davide Libenzi
2006-01-18 18:38 ` Michael Tokarev
1 sibling, 2 replies; 11+ messages in thread
From: Ulrich Drepper @ 2006-01-18 4:20 UTC (permalink / raw)
To: Davide Libenzi
Cc: Linux Kernel Mailing List, David Miller, Andrew Morton, David Woodhouse
[-- Attachment #1: Type: text/plain, Size: 920 bytes --]
Davide Libenzi wrote:
> The attached patch implements the pepoll_wait system call, that extend
> the event wait mechanism with the same logic ppoll and pselect do. The
> definition of pepoll_wait is: [...]
I definitely ACK this patch, it's needed for the same reasons we need
pselect and ppoll.
> + if (error == -EINTR) {
> + if (sigmask) {
> + memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved));
> + set_thread_flag(TIF_RESTORE_SIGMASK);
> + }
> + } else if (sigmask)
> + sigprocmask(SIG_SETMASK, &sigsaved, NULL);
This part I'd clean up a bit, though. Move the if (sigmask) test to the
top and have the EINTR test decide what to do. As is the code would be
a bit irritating if it wouldn't be so trivial. The important thing is
that you only do something special if sigmask != NULL.
--
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 251 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] pepoll_wait ...
2006-01-18 4:20 ` Ulrich Drepper
@ 2006-01-18 5:03 ` Andrew Morton
2006-01-18 7:13 ` David Woodhouse
2006-01-18 7:40 ` Davide Libenzi
2006-01-18 7:38 ` Davide Libenzi
1 sibling, 2 replies; 11+ messages in thread
From: Andrew Morton @ 2006-01-18 5:03 UTC (permalink / raw)
To: Ulrich Drepper; +Cc: davidel, linux-kernel, davem, dwmw2
Ulrich Drepper <drepper@redhat.com> wrote:
>
> Davide Libenzi wrote:
> > The attached patch implements the pepoll_wait system call, that extend
> > the event wait mechanism with the same logic ppoll and pselect do. The
> > definition of pepoll_wait is: [...]
>
> I definitely ACK this patch, it's needed for the same reasons we need
> pselect and ppoll.
>
It busts most architectures.
fs/eventpoll.c: In function `sys_pepoll_wait':
fs/eventpoll.c:727: error: `TIF_RESTORE_SIGMASK' undeclared (first use in this function)
It seems that the preferred way to fix this is to sprinkle #ifdef
TIF_RESTORE_SIGMASK all over the code.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] pepoll_wait ...
2006-01-18 5:03 ` Andrew Morton
@ 2006-01-18 7:13 ` David Woodhouse
2006-01-18 7:40 ` Davide Libenzi
1 sibling, 0 replies; 11+ messages in thread
From: David Woodhouse @ 2006-01-18 7:13 UTC (permalink / raw)
To: Andrew Morton; +Cc: Ulrich Drepper, davidel, linux-kernel, davem
On Tue, 2006-01-17 at 21:03 -0800, Andrew Morton wrote:
> It seems that the preferred way to fix this is to sprinkle #ifdef
> TIF_RESTORE_SIGMASK all over the code.
That's intended to be a temporary 'fix' until all the other
architectures catch up. I don't want it there long-term.
--
dwmw2
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] pepoll_wait ...
2006-01-18 4:20 ` Ulrich Drepper
2006-01-18 5:03 ` Andrew Morton
@ 2006-01-18 7:38 ` Davide Libenzi
1 sibling, 0 replies; 11+ messages in thread
From: Davide Libenzi @ 2006-01-18 7:38 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Linux Kernel Mailing List, David Miller, Andrew Morton, David Woodhouse
On Tue, 17 Jan 2006, Ulrich Drepper wrote:
> Davide Libenzi wrote:
>> The attached patch implements the pepoll_wait system call, that extend
>> the event wait mechanism with the same logic ppoll and pselect do. The
>> definition of pepoll_wait is: [...]
>
> I definitely ACK this patch, it's needed for the same reasons we need
> pselect and ppoll.
>
>
>> + if (error == -EINTR) {
>> + if (sigmask) {
>> + memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved));
>> + set_thread_flag(TIF_RESTORE_SIGMASK);
>> + }
>> + } else if (sigmask)
>> + sigprocmask(SIG_SETMASK, &sigsaved, NULL);
>
> This part I'd clean up a bit, though. Move the if (sigmask) test to the
> top and have the EINTR test decide what to do. As is the code would be
> a bit irritating if it wouldn't be so trivial. The important thing is
> that you only do something special if sigmask != NULL.
Agreed.
- Davide
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] pepoll_wait ...
2006-01-18 5:03 ` Andrew Morton
2006-01-18 7:13 ` David Woodhouse
@ 2006-01-18 7:40 ` Davide Libenzi
2006-01-18 7:48 ` David Woodhouse
1 sibling, 1 reply; 11+ messages in thread
From: Davide Libenzi @ 2006-01-18 7:40 UTC (permalink / raw)
To: Andrew Morton
Cc: Ulrich Drepper, Linux Kernel Mailing List, David Miller, dwmw2
On Tue, 17 Jan 2006, Andrew Morton wrote:
> Ulrich Drepper <drepper@redhat.com> wrote:
>>
>> Davide Libenzi wrote:
>>> The attached patch implements the pepoll_wait system call, that extend
>>> the event wait mechanism with the same logic ppoll and pselect do. The
>>> definition of pepoll_wait is: [...]
>>
>> I definitely ACK this patch, it's needed for the same reasons we need
>> pselect and ppoll.
>>
>
> It busts most architectures.
>
> fs/eventpoll.c: In function `sys_pepoll_wait':
> fs/eventpoll.c:727: error: `TIF_RESTORE_SIGMASK' undeclared (first use in this function)
>
> It seems that the preferred way to fix this is to sprinkle #ifdef
> TIF_RESTORE_SIGMASK all over the code.
Hey, I've written in the comments that it depends on the
TIF_RESTORE_SIGMASK bits ;) The latest one that dwmw posted used such
feature, so I though to align epoll bits to that too.
- Davide
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] pepoll_wait ...
2006-01-18 7:40 ` Davide Libenzi
@ 2006-01-18 7:48 ` David Woodhouse
2006-01-18 8:14 ` Davide Libenzi
0 siblings, 1 reply; 11+ messages in thread
From: David Woodhouse @ 2006-01-18 7:48 UTC (permalink / raw)
To: Davide Libenzi
Cc: Andrew Morton, Ulrich Drepper, Linux Kernel Mailing List, David Miller
On Tue, 2006-01-17 at 23:40 -0800, Davide Libenzi wrote:
> Hey, I've written in the comments that it depends on the
> TIF_RESTORE_SIGMASK bits ;) The latest one that dwmw posted used such
> feature, so I though to align epoll bits to that too.
The point is that TIF_RESTORE_SIGMASK needs to be implemented for each
architecture, and we only have it for powerpc, i386 and FR-V at the
moment. So in _generic_ files you have to use #ifdef TIF_RESTORE_SIGMASK
for now, until the other architectures catch up.
--
dwmw2
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] pepoll_wait ...
2006-01-18 7:48 ` David Woodhouse
@ 2006-01-18 8:14 ` Davide Libenzi
0 siblings, 0 replies; 11+ messages in thread
From: Davide Libenzi @ 2006-01-18 8:14 UTC (permalink / raw)
To: David Woodhouse
Cc: Andrew Morton, Ulrich Drepper, Linux Kernel Mailing List, David Miller
On Wed, 18 Jan 2006, David Woodhouse wrote:
> On Tue, 2006-01-17 at 23:40 -0800, Davide Libenzi wrote:
>> Hey, I've written in the comments that it depends on the
>> TIF_RESTORE_SIGMASK bits ;) The latest one that dwmw posted used such
>> feature, so I though to align epoll bits to that too.
>
> The point is that TIF_RESTORE_SIGMASK needs to be implemented for each
> architecture, and we only have it for powerpc, i386 and FR-V at the
> moment. So in _generic_ files you have to use #ifdef TIF_RESTORE_SIGMASK
> for now, until the other architectures catch up.
Ok, will do. You then let me know when all archs are aligned so that I can
nuke the #ifdef and use TIF_RESTORE_SIGMASK.
- Davide
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] pepoll_wait ...
2006-01-18 3:36 [PATCH] pepoll_wait Davide Libenzi
2006-01-18 4:20 ` Ulrich Drepper
@ 2006-01-18 18:38 ` Michael Tokarev
2006-01-18 18:51 ` Davide Libenzi
2006-01-18 19:07 ` Ulrich Drepper
1 sibling, 2 replies; 11+ messages in thread
From: Michael Tokarev @ 2006-01-18 18:38 UTC (permalink / raw)
To: Davide Libenzi
Cc: Linux Kernel Mailing List, David Miller, Ulrich Drepper,
Andrew Morton, David Woodhouse
Davide Libenzi wrote:
>
> The attached patch implements the pepoll_wait system call, that extend
> the event wait mechanism with the same logic ppoll and pselect do. The
> definition of pepoll_wait is:
>
> int pepoll_wait(int epfd, struct epoll_event *events, int maxevents,
> int timeout, const sigset_t *sigmask, size_t sigsetsize);
How about epoll_pwait() instead? It looks more appropriate, for
my eyes anyway. (Just a name, nothing more)
/mjt
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] pepoll_wait ...
2006-01-18 18:38 ` Michael Tokarev
@ 2006-01-18 18:51 ` Davide Libenzi
2006-01-18 19:07 ` Ulrich Drepper
1 sibling, 0 replies; 11+ messages in thread
From: Davide Libenzi @ 2006-01-18 18:51 UTC (permalink / raw)
To: Michael Tokarev
Cc: Linux Kernel Mailing List, David Miller, Ulrich Drepper,
Andrew Morton, David Woodhouse
On Wed, 18 Jan 2006, Michael Tokarev wrote:
> Davide Libenzi wrote:
>>
>> The attached patch implements the pepoll_wait system call, that extend the
>> event wait mechanism with the same logic ppoll and pselect do. The
>> definition of pepoll_wait is:
>>
>> int pepoll_wait(int epfd, struct epoll_event *events, int maxevents,
>> int timeout, const sigset_t *sigmask, size_t sigsetsize);
>
> How about epoll_pwait() instead? It looks more appropriate, for
> my eyes anyway. (Just a name, nothing more)
Thinking about it, it looks netter for me too. I'll change it if there are
no other objections ...
- Davide
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] pepoll_wait ...
2006-01-18 18:38 ` Michael Tokarev
2006-01-18 18:51 ` Davide Libenzi
@ 2006-01-18 19:07 ` Ulrich Drepper
1 sibling, 0 replies; 11+ messages in thread
From: Ulrich Drepper @ 2006-01-18 19:07 UTC (permalink / raw)
To: Michael Tokarev
Cc: Davide Libenzi, Linux Kernel Mailing List, David Miller,
Andrew Morton, David Woodhouse
[-- Attachment #1: Type: text/plain, Size: 318 bytes --]
Michael Tokarev wrote:
> How about epoll_pwait() instead? It looks more appropriate, for
> my eyes anyway. (Just a name, nothing more)
It's just a name but I have thought along the same lines. It's just more
pleasing.
--
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 251 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2006-01-18 19:07 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-01-18 3:36 [PATCH] pepoll_wait Davide Libenzi
2006-01-18 4:20 ` Ulrich Drepper
2006-01-18 5:03 ` Andrew Morton
2006-01-18 7:13 ` David Woodhouse
2006-01-18 7:40 ` Davide Libenzi
2006-01-18 7:48 ` David Woodhouse
2006-01-18 8:14 ` Davide Libenzi
2006-01-18 7:38 ` Davide Libenzi
2006-01-18 18:38 ` Michael Tokarev
2006-01-18 18:51 ` Davide Libenzi
2006-01-18 19:07 ` Ulrich Drepper
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).