linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Add pselect, ppoll system calls.
@ 2005-06-10 22:58 David Woodhouse
  2005-06-12 22:48 ` Alan Cox
  0 siblings, 1 reply; 45+ messages in thread
From: David Woodhouse @ 2005-06-10 22:58 UTC (permalink / raw)
  To: linux-kernel, akpm, torvalds; +Cc: drepper

Says the man page...

The idea of pselect is that if one wants to wait for an event, either a
signal  or  something on a file descriptor, an atomic test is needed to
prevent race conditions. (Suppose the signal handler sets a global flag
and  returns.  Then  a  test  of this global flag followed by a call of
select() could hang indefinitely if the signal arrived just  after  the
test but just before the call. On the other hand, pselect allows one to
first block signals, handle the signals that have come  in,  then  call
pselect()  with  the  desired sigmask, avoiding the race.)  Since Linux
today does not have a pselect() system call, the current glibc2 routine
still contains this race.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

Index: arch/i386/kernel/syscall_table.S
===================================================================
--- 589b4166efe6ab929ee25b20b2e89935efba504e/arch/i386/kernel/syscall_table.S  (mode:100644)
+++ uncommitted/arch/i386/kernel/syscall_table.S  (mode:100644)
@@ -289,3 +289,5 @@
 	.long sys_add_key
 	.long sys_request_key
 	.long sys_keyctl
+	.long sys_pselect6
+	.long sys_ppoll			/* 290 */
Index: arch/ppc/kernel/misc.S
===================================================================
--- 589b4166efe6ab929ee25b20b2e89935efba504e/arch/ppc/kernel/misc.S  (mode:100644)
+++ uncommitted/arch/ppc/kernel/misc.S  (mode:100644)
@@ -1441,3 +1441,5 @@
 	.long sys_request_key		/* 270 */
 	.long sys_keyctl
 	.long sys_waitid
+	.long sys_pselect6
+	.long sys_ppoll
Index: arch/ppc64/kernel/misc.S
===================================================================
--- 589b4166efe6ab929ee25b20b2e89935efba504e/arch/ppc64/kernel/misc.S  (mode:100644)
+++ uncommitted/arch/ppc64/kernel/misc.S  (mode:100644)
@@ -953,10 +953,12 @@
 	.llong .compat_sys_mq_getsetattr
 	.llong .sys_ni_syscall		/* 268 reserved for sys_kexec_load */
 	.llong .sys32_add_key
-	.llong .sys32_request_key
+	.llong .sys32_request_key	/* 270 */
 	.llong .compat_sys_keyctl
 	.llong .compat_sys_waitid
-
+	.llong .compat_sys_pselect6
+	.llong .compat_sys_ppoll
+	
 	.balign 8
 _GLOBAL(sys_call_table)
 	.llong .sys_restart_syscall	/* 0 */
@@ -1232,3 +1234,5 @@
 	.llong .sys_request_key		/* 270 */
 	.llong .sys_keyctl
 	.llong .sys_waitid
+	.llong .sys_pselect6
+	.llong .sys_ppoll
Index: arch/ppc64/kernel/signal32.c
===================================================================
--- 589b4166efe6ab929ee25b20b2e89935efba504e/arch/ppc64/kernel/signal32.c  (mode:100644)
+++ uncommitted/arch/ppc64/kernel/signal32.c  (mode:100644)
@@ -112,17 +112,6 @@
 	}
 }
 
-static inline void sigset_from_compat(sigset_t *set, compat_sigset_t *compat)
-{
-	switch (_NSIG_WORDS) {
-	case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32);
-	case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32);
-	case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32);
-	case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32);
-	}
-}
-
-
 /*
  * Save the current user registers on the user stack.
  * We only save the altivec registers if the process has used
Index: fs/compat.c
===================================================================
--- 589b4166efe6ab929ee25b20b2e89935efba504e/fs/compat.c  (mode:100644)
+++ uncommitted/fs/compat.c  (mode:100644)
@@ -1782,6 +1782,76 @@
 	return ret;
 }
 
+asmlinkage long
+compat_sys_pselect7(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
+		    compat_ulong_t __user *exp, struct compat_timeval __user *tvp,
+		    compat_sigset_t __user *sigmask, compat_size_t sigsetsize)
+{
+	compat_sigset_t s32;
+	sigset_t ksigmask, sigsaved;
+	long ret;
+
+	if (sigmask) {
+		if (sigsetsize != sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&s32, sigmask, sizeof(s32)))
+			return -EFAULT;
+		sigset_from_compat(&ksigmask, &s32);
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+	ret = compat_sys_select(n, inp, outp, exp, tvp);
+
+	if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
+
+asmlinkage long
+compat_sys_pselect6(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
+		   compat_ulong_t __user *exp, struct compat_timeval __user *tvp,
+		    void __user *sig)
+{
+	compat_size_t sigsetsize = 0;
+	compat_uptr_t up = 0;
+
+	if (sig) {
+		if (!access_ok(VERIFY_READ, sig, sizeof(compat_uptr_t) + sizeof(compat_size_t))
+		    || __get_user(up, (compat_uptr_t __user *)sig)
+		    || __get_user(sigsetsize, (compat_size_t __user *)(sig+sizeof(up))))
+			return -EFAULT;
+	}
+	return compat_sys_pselect7(n, inp, outp, exp, tvp, compat_ptr(up), sigsetsize);
+}
+
+asmlinkage long
+compat_sys_ppoll(struct pollfd __user * ufds, unsigned int nfds, compat_ulong_t timeout, 
+		 const compat_sigset_t __user *sigmask, compat_size_t sigsetsize)
+{
+	compat_sigset_t s32;
+	sigset_t ksigmask, sigsaved;
+	long ret;
+
+	if (sigmask) {
+		if (sigsetsize |= sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&s32, sigmask, sizeof(s32)))
+			return -EFAULT;
+		sigset_from_compat(&ksigmask, &s32);
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+	ret = sys_poll(ufds, nfds, timeout);
+
+	if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;	
+}
+
 #if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
 /* Stuff for NFS server syscalls... */
 struct compat_nfsctl_svc {
Index: fs/select.c
===================================================================
--- 589b4166efe6ab929ee25b20b2e89935efba504e/fs/select.c  (mode:100644)
+++ uncommitted/fs/select.c  (mode:100644)
@@ -388,12 +388,61 @@
 	return ret;
 }
 
+asmlinkage long
+sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp, const sigset_t __user *sigmask, size_t sigsetsize)
+{
+	sigset_t ksigmask, sigsaved;
+	long ret;
+
+
+	if (sigmask) {
+		/* XXX: Don't preclude handling different sized sigset_t's.  */
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+	ret = sys_select(n, inp, outp, exp, tvp);
+
+	if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	return ret;
+}
+
 struct poll_list {
 	struct poll_list *next;
 	int len;
 	struct pollfd entries[0];
 };
 
+/* Most architectures can't handle 7-argument syscalls. So we provide
+   a 6-argument version where the sixth argument is a pointer to a
+   structure which has a uint32_t containing the sigset size, followed
+   by the sigset_t itself. This unfortunately means that libc's
+   pselect() call has to _copy_ the sigset into this structure which
+   it presumably allocates on the stack -- but it's only small, and it
+   means we don't have to screw around with 32/64-bit compatibility
+   layers as we would have to if the struct contained a _pointer_ to the
+   user's original sigset_t. */
+asmlinkage long
+sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp, void __user *sig)
+{
+	size_t sigsetsize = 0;
+	sigset_t __user *up = NULL;
+
+	if (sig) {
+		if (!access_ok(VERIFY_READ, sig, sizeof(void *) + sizeof(size_t))
+		    || __get_user(up, (sigset_t * __user *)sig)
+		    || __get_user(sigsetsize, (size_t * __user)(sig+sizeof(void *))))
+			return -EFAULT;
+	}
+	
+	return sys_pselect7(n, inp, outp, exp, tvp, up, sigsetsize);
+}
+
 #define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
 
 static void do_pollfd(unsigned int num, struct pollfd * fdpage,
@@ -534,3 +583,27 @@
 	poll_freewait(&table);
 	return err;
 }
+
+asmlinkage long sys_ppoll(struct pollfd __user * ufds, unsigned int nfds, long timeout, const sigset_t __user *sigmask, size_t sigsetsize)
+{
+	sigset_t ksigmask, sigsaved;
+	long ret;
+
+	if (sigmask) {
+		/* XXX: Don't preclude handling different sized sigset_t's.  */
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	ret = sys_poll(ufds, nfds, timeout);
+
+	if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
Index: include/asm-i386/unistd.h
===================================================================
--- 589b4166efe6ab929ee25b20b2e89935efba504e/include/asm-i386/unistd.h  (mode:100644)
+++ uncommitted/include/asm-i386/unistd.h  (mode:100644)
@@ -294,8 +294,10 @@
 #define __NR_add_key		286
 #define __NR_request_key	287
 #define __NR_keyctl		288
+#define __NR_pselect6		289
+#define __NR_ppoll		290
 
-#define NR_syscalls 289
+#define NR_syscalls 291
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
Index: include/asm-ppc/unistd.h
===================================================================
--- 589b4166efe6ab929ee25b20b2e89935efba504e/include/asm-ppc/unistd.h  (mode:100644)
+++ uncommitted/include/asm-ppc/unistd.h  (mode:100644)
@@ -277,8 +277,10 @@
 #define __NR_request_key	270
 #define __NR_keyctl		271
 #define __NR_waitid		272
+#define __NR_pselect6		273
+#define __NR_ppoll		274
 
-#define __NR_syscalls		273
+#define __NR_syscalls		275
 
 #define __NR(n)	#n
 
Index: include/asm-ppc64/unistd.h
===================================================================
--- 589b4166efe6ab929ee25b20b2e89935efba504e/include/asm-ppc64/unistd.h  (mode:100644)
+++ uncommitted/include/asm-ppc64/unistd.h  (mode:100644)
@@ -283,8 +283,10 @@
 #define __NR_request_key	270
 #define __NR_keyctl		271
 #define __NR_waitid		272
+#define __NR_pselect6		273
+#define __NR_ppoll		274
 
-#define __NR_syscalls		273
+#define __NR_syscalls		275
 #ifdef __KERNEL__
 #define NR_syscalls	__NR_syscalls
 #endif
Index: include/linux/compat.h
===================================================================
--- 589b4166efe6ab929ee25b20b2e89935efba504e/include/linux/compat.h  (mode:100644)
+++ uncommitted/include/linux/compat.h  (mode:100644)
@@ -48,6 +48,7 @@
 	compat_sigset_word	sig[_COMPAT_NSIG_WORDS];
 } compat_sigset_t;
 
+extern void sigset_from_compat (sigset_t *set, compat_sigset_t *compat);
 extern int cp_compat_stat(struct kstat *, struct compat_stat __user *);
 extern int get_compat_timespec(struct timespec *, const struct compat_timespec __user *);
 extern int put_compat_timespec(const struct timespec *, struct compat_timespec __user *);

-- 
dwmw2


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-10 22:58 Add pselect, ppoll system calls David Woodhouse
@ 2005-06-12 22:48 ` Alan Cox
  2005-06-13  0:26   ` Linus Torvalds
  0 siblings, 1 reply; 45+ messages in thread
From: Alan Cox @ 2005-06-12 22:48 UTC (permalink / raw)
  To: David Woodhouse; +Cc: Linux Kernel Mailing List, akpm, torvalds, drepper

On Gwe, 2005-06-10 at 23:58, David Woodhouse wrote:
> The idea of pselect is that if one wants to wait for an event, either a
> signal  or  something on a file descriptor, an atomic test is needed to
> prevent race conditions. (Suppose the signal handler sets a global flag
> and  returns.  Then  a  test  of this global flag followed by a call of
> select() could hang indefinitely if the signal arrived just  after  the
> test but just before the call. On the other hand, pselect allows one to

See sleep(), going back to oh V7 unix. It has this avoided nicely in
user space using setjmp (nowdays using sigsetjmp).

If glibc has a race why not just fix glibc ?


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-12 22:48 ` Alan Cox
@ 2005-06-13  0:26   ` Linus Torvalds
  2005-06-13  1:16     ` jnf
  2005-06-13 11:15     ` Alan Cox
  0 siblings, 2 replies; 45+ messages in thread
From: Linus Torvalds @ 2005-06-13  0:26 UTC (permalink / raw)
  To: Alan Cox; +Cc: David Woodhouse, Linux Kernel Mailing List, akpm, drepper



On Sun, 12 Jun 2005, Alan Cox wrote:
> 
> If glibc has a race why not just fix glibc ?

.. because glibc tries to implement the "pselect()" interfaces that some 
other OS's implement.

Whether that is a good idea or not (it's not) is a different issue, but 
basically it means that apps don't use the longjmp trick exactly because 
they think pselect() works ok.

		Linus

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13  0:26   ` Linus Torvalds
@ 2005-06-13  1:16     ` jnf
  2005-06-13  3:26       ` Linus Torvalds
  2005-06-13 11:15     ` Alan Cox
  1 sibling, 1 reply; 45+ messages in thread
From: jnf @ 2005-06-13  1:16 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Alan Cox, David Woodhouse, Linux Kernel Mailing List, akpm, drepper

Hi.
I realize this is off subject to the mailing list, however its not really
off subject to the thread. What is the suggested method for dealing with
this? i.e. catching sigint which sets a global variable and then having
select() inside the loop, i.e.

while (boolean < 1 ) {
   [...]
   select(...);
   [...]
}

regards,

jnf


On Sun, 12 Jun 2005, Linus Torvalds wrote:

> Date: Sun, 12 Jun 2005 17:26:43 -0700 (PDT)
> From: Linus Torvalds <torvalds@osdl.org>
> To: Alan Cox <alan@lxorguk.ukuu.org.uk>
> Cc: David Woodhouse <dwmw2@infradead.org>,
>     Linux Kernel Mailing List <linux-kernel@vger.kernel.org>, akpm@osdl.org,
>     drepper@redhat.com
> Subject: Re: Add pselect, ppoll system calls.
>
>
>
> On Sun, 12 Jun 2005, Alan Cox wrote:
> >
> > If glibc has a race why not just fix glibc ?
>
> .. because glibc tries to implement the "pselect()" interfaces that some
> other OS's implement.
>
> Whether that is a good idea or not (it's not) is a different issue, but
> basically it means that apps don't use the longjmp trick exactly because
> they think pselect() works ok.
>
> 		Linus
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13  1:16     ` jnf
@ 2005-06-13  3:26       ` Linus Torvalds
  2005-06-13  6:22         ` Ulrich Drepper
  2005-06-13  7:23         ` Benjamin Herrenschmidt
  0 siblings, 2 replies; 45+ messages in thread
From: Linus Torvalds @ 2005-06-13  3:26 UTC (permalink / raw)
  To: jnf; +Cc: Alan Cox, David Woodhouse, Linux Kernel Mailing List, akpm, drepper



On Sun, 12 Jun 2005, jnf wrote:

> Hi. I realize this is off subject to the mailing list, however its not
> really off subject to the thread. What is the suggested method for
> dealing with this? i.e. catching sigint which sets a global variable and
> then having select() inside the loop, i.e.
> 
> while (boolean < 1 ) {
>    [...]
>    select(...);

Nope, that will have a race in between testing "boolean" and the select.

The simplest way is to do

	if (sigsetjmp(..)) {
		... handle signal ...
	}
	for (;;) {
		select(..)
	}

but a lot of people find the control flow very confusing, and I can't much 
blame them. 

One pretty simple alternative is to just make the timeout be a global, and 
have the signal handler clear it, guaranteeing that if we're just about to 
hit the select(), we'll exit immediately.

A third alternative that some people prefer (although I personally find it
to be the most complex of the bunch and it's also the least efficient, but
it works in threaded environments) is to have the signal handler write a
byte to a pipe, and have the select() mask contain that pipe as one of the
inputs so that the main loop sees the signal even as a pipe readability
test.

Anyway, that's three different approaches, all of which are portable and 
thus preferable to pselect() which is not.

			Linus

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13  3:26       ` Linus Torvalds
@ 2005-06-13  6:22         ` Ulrich Drepper
  2005-06-13  9:16           ` bert hubert
  2005-06-13  7:23         ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 45+ messages in thread
From: Ulrich Drepper @ 2005-06-13  6:22 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: jnf, Alan Cox, David Woodhouse, Linux Kernel Mailing List, akpm

[-- Attachment #1: Type: text/plain, Size: 373 bytes --]

Linus Torvalds wrote:
> Anyway, that's three different approaches, all of which are portable and 
> thus preferable to pselect() which is not.

pselect is portable, just not to current and old Linux systems.  It's in
POSIX for a while and the other Unixes have the interface.

-- 
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 251 bytes --]

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13  3:26       ` Linus Torvalds
  2005-06-13  6:22         ` Ulrich Drepper
@ 2005-06-13  7:23         ` Benjamin Herrenschmidt
  2005-06-13  7:29           ` David Woodhouse
  1 sibling, 1 reply; 45+ messages in thread
From: Benjamin Herrenschmidt @ 2005-06-13  7:23 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: jnf, Alan Cox, David Woodhouse, Linux Kernel Mailing List, akpm, drepper


> One pretty simple alternative is to just make the timeout be a global, and 
> have the signal handler clear it, guaranteeing that if we're just about to 
> hit the select(), we'll exit immediately.

That is still racy ... if the signal hits between loading that global to
to pass it to select and the actual syscall entry ... pretty narrow but
still there.



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13  7:23         ` Benjamin Herrenschmidt
@ 2005-06-13  7:29           ` David Woodhouse
  2005-06-13 22:56             ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 45+ messages in thread
From: David Woodhouse @ 2005-06-13  7:29 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Linus Torvalds, jnf, Alan Cox, Linux Kernel Mailing List, akpm, drepper

On Mon, 2005-06-13 at 17:23 +1000, Benjamin Herrenschmidt wrote:
> That is still racy ... if the signal hits between loading that global to
> to pass it to select and the actual syscall entry ... pretty narrow but
> still there.

We don't load it; we pass a pointer to select. It works, but it's hardly
elegant.

-- 
dwmw2


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13  6:22         ` Ulrich Drepper
@ 2005-06-13  9:16           ` bert hubert
  2005-06-13  9:41             ` David Woodhouse
  0 siblings, 1 reply; 45+ messages in thread
From: bert hubert @ 2005-06-13  9:16 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: Linus Torvalds, jnf, Alan Cox, David Woodhouse,
	Linux Kernel Mailing List, akpm

On Sun, Jun 12, 2005 at 11:22:56PM -0700, Ulrich Drepper wrote:
> pselect is portable, just not to current and old Linux systems.  It's in
> POSIX for a while and the other Unixes have the interface.

Indeed - it has been documented since the days of Stevens at least. All
unixes that matter have it.

>From the application side, I pretty much really want pselect to work. It is
an embarassment to have a broken implementation, if we really think pselect
sucks we should deprecate it in glibc.

So, please, consider merging the patches.. ppoll is something else, I never
heard about it, but pselect is widely known.

Thanks.

-- 
http://www.PowerDNS.com      Open source, database driven DNS Software 
http://netherlabs.nl              Open and Closed source services

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13  9:16           ` bert hubert
@ 2005-06-13  9:41             ` David Woodhouse
  2005-06-13 11:05               ` Christoph Hellwig
  0 siblings, 1 reply; 45+ messages in thread
From: David Woodhouse @ 2005-06-13  9:41 UTC (permalink / raw)
  To: bert hubert
  Cc: Ulrich Drepper, Linus Torvalds, jnf, Alan Cox,
	Linux Kernel Mailing List, akpm

On Mon, 2005-06-13 at 11:16 +0200, bert hubert wrote:
> So, please, consider merging the patches.. ppoll is something else, I never
> heard about it, but pselect is widely known.

It just seemed silly to implement pselect() without doing ppoll() at the
same time.

-- 
dwmw2


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13  9:41             ` David Woodhouse
@ 2005-06-13 11:05               ` Christoph Hellwig
  2005-06-13 11:14                 ` Jakub Jelinek
  2005-06-13 15:35                 ` Ulrich Drepper
  0 siblings, 2 replies; 45+ messages in thread
From: Christoph Hellwig @ 2005-06-13 11:05 UTC (permalink / raw)
  To: David Woodhouse
  Cc: bert hubert, Ulrich Drepper, Linus Torvalds, jnf, Alan Cox,
	Linux Kernel Mailing List, akpm

On Mon, Jun 13, 2005 at 10:41:41AM +0100, David Woodhouse wrote:
> On Mon, 2005-06-13 at 11:16 +0200, bert hubert wrote:
> > So, please, consider merging the patches.. ppoll is something else, I never
> > heard about it, but pselect is widely known.
> 
> It just seemed silly to implement pselect() without doing ppoll() at the
> same time.

Yes, it kinda makes sense.  Question to Uli: would you put ppoll() into
glibc as GNU extension?  If not adding the syscall is rather poinless
unfortunately.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 11:05               ` Christoph Hellwig
@ 2005-06-13 11:14                 ` Jakub Jelinek
  2005-06-13 11:24                   ` David Woodhouse
  2005-06-13 15:35                 ` Ulrich Drepper
  1 sibling, 1 reply; 45+ messages in thread
From: Jakub Jelinek @ 2005-06-13 11:14 UTC (permalink / raw)
  To: Christoph Hellwig, David Woodhouse, bert hubert, Ulrich Drepper,
	Linus Torvalds, jnf, Alan Cox, Linux Kernel Mailing List, akpm

On Mon, Jun 13, 2005 at 12:05:56PM +0100, Christoph Hellwig wrote:
> On Mon, Jun 13, 2005 at 10:41:41AM +0100, David Woodhouse wrote:
> > On Mon, 2005-06-13 at 11:16 +0200, bert hubert wrote:
> > > So, please, consider merging the patches.. ppoll is something else, I never
> > > heard about it, but pselect is widely known.
> > 
> > It just seemed silly to implement pselect() without doing ppoll() at the
> > same time.
> 
> Yes, it kinda makes sense.  Question to Uli: would you put ppoll() into
> glibc as GNU extension?  If not adding the syscall is rather poinless
> unfortunately.

And if ppoll is added, the question is also what it's prototype should look like
(i.e. have the suboptimal int timeout argument as poll(2), where you can
only wait for less than a month or infinitely, or use const struct timespec *
like pselect, or struct timeval * like select).

	Jakub

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13  0:26   ` Linus Torvalds
  2005-06-13  1:16     ` jnf
@ 2005-06-13 11:15     ` Alan Cox
  2005-06-13 11:27       ` David Woodhouse
  2005-06-13 15:40       ` Ulrich Drepper
  1 sibling, 2 replies; 45+ messages in thread
From: Alan Cox @ 2005-06-13 11:15 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: David Woodhouse, Linux Kernel Mailing List, akpm, drepper

On Llu, 2005-06-13 at 01:26, Linus Torvalds wrote:
> On Sun, 12 Jun 2005, Alan Cox wrote:
> > 
> > If glibc has a race why not just fix glibc ?
> 
> .. because glibc tries to implement the "pselect()" interfaces that some 
> other OS's implement.
> 
> Whether that is a good idea or not (it's not) is a different issue, but 
> basically it means that apps don't use the longjmp trick exactly because 
> they think pselect() works ok.

Right but why can't glibc be fixed to use the longjmp trick internally.
That way pselect works properly for all applications as soon as the
glibc bug fix is
applied and the vendor/distro/whatever pushes a new glibc package ?

Alan


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 11:14                 ` Jakub Jelinek
@ 2005-06-13 11:24                   ` David Woodhouse
  2005-06-13 15:38                     ` Ulrich Drepper
  0 siblings, 1 reply; 45+ messages in thread
From: David Woodhouse @ 2005-06-13 11:24 UTC (permalink / raw)
  To: Jakub Jelinek
  Cc: Christoph Hellwig, bert hubert, Ulrich Drepper, Linus Torvalds,
	jnf, Alan Cox, Linux Kernel Mailing List, akpm

On Mon, 2005-06-13 at 07:14 -0400, Jakub Jelinek wrote:
> And if ppoll is added, the question is also what it's prototype should look like
> (i.e. have the suboptimal int timeout argument as poll(2), where you can
> only wait for less than a month or infinitely, or use const struct timespec *
> like pselect, or struct timeval * like select).

Eep -- I hadn't noticed that difference. Will update patch accordingly. 

The other documented difference (other than the signal mask bit) is that
pselect() is documented not to modify the timeout parameter. I'm not
sure whether I should preserve that difference, or not.

-- 
dwmw2


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 11:15     ` Alan Cox
@ 2005-06-13 11:27       ` David Woodhouse
  2005-06-13 15:40       ` Ulrich Drepper
  1 sibling, 0 replies; 45+ messages in thread
From: David Woodhouse @ 2005-06-13 11:27 UTC (permalink / raw)
  To: Alan Cox; +Cc: Linus Torvalds, Linux Kernel Mailing List, akpm, drepper

On Mon, 2005-06-13 at 12:15 +0100, Alan Cox wrote:
> Right but why can't glibc be fixed to use the longjmp trick
> internally.

Unless I misunderstand what is suggested, for glibc to do that
internally would be vile. It'd have to override the signal handler for
every signal which is unmasked by the new signal mask, right?

That kind of trick makes some sense to use in an _application_ if the
author isn't expecting to run on a POSIX system and just use pselect(),
but for glibc to hack it up internally would be horrid.

-- 
dwmw2


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 11:05               ` Christoph Hellwig
  2005-06-13 11:14                 ` Jakub Jelinek
@ 2005-06-13 15:35                 ` Ulrich Drepper
  1 sibling, 0 replies; 45+ messages in thread
From: Ulrich Drepper @ 2005-06-13 15:35 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: David Woodhouse, bert hubert, Linus Torvalds, jnf, Alan Cox,
	Linux Kernel Mailing List, akpm

[-- Attachment #1: Type: text/plain, Size: 475 bytes --]

Christoph Hellwig wrote:
> Yes, it kinda makes sense.  Question to Uli: would you put ppoll() into
> glibc as GNU extension?

Of course.  I would rather not add pselect() and deprecate select() than
not adding ppoll().  In fact, we just discussed a similar issue in the
POSIX base working group.  Due to the limitations select() might indeed
get the axe in a future revision.

-- 
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 251 bytes --]

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 11:24                   ` David Woodhouse
@ 2005-06-13 15:38                     ` Ulrich Drepper
  2005-06-13 16:02                       ` David Woodhouse
  2005-06-15 11:36                       ` David Woodhouse
  0 siblings, 2 replies; 45+ messages in thread
From: Ulrich Drepper @ 2005-06-13 15:38 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Jakub Jelinek, Linus Torvalds, Linux Kernel Mailing List, akpm

[-- Attachment #1: Type: text/plain, Size: 615 bytes --]

David Woodhouse wrote:

> Eep -- I hadn't noticed that difference. Will update patch accordingly. 

And change it to expect a 64bit value I hope...


> The other documented difference (other than the signal mask bit) is that
> pselect() is documented not to modify the timeout parameter. I'm not
> sure whether I should preserve that difference, or not.

As long as there is a configuration where the timeout value is not
modified, it doesn't matter.  That is the case for select() using a
personality switch.

-- 
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 251 bytes --]

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 11:15     ` Alan Cox
  2005-06-13 11:27       ` David Woodhouse
@ 2005-06-13 15:40       ` Ulrich Drepper
  1 sibling, 0 replies; 45+ messages in thread
From: Ulrich Drepper @ 2005-06-13 15:40 UTC (permalink / raw)
  To: Alan Cox; +Cc: Linus Torvalds, Linux Kernel Mailing List, akpm

[-- Attachment #1: Type: text/plain, Size: 339 bytes --]

Alan Cox wrote:
> Right but why can't glibc be fixed to use the longjmp trick internally.


How, Alan?  Remember, these are not the days of V7.  We have threads.
And we have signal handlers as a process-wide property, shared by all
threads.

-- 
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 251 bytes --]

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 15:38                     ` Ulrich Drepper
@ 2005-06-13 16:02                       ` David Woodhouse
  2005-06-13 16:10                         ` Ulrich Drepper
  2005-06-15 11:36                       ` David Woodhouse
  1 sibling, 1 reply; 45+ messages in thread
From: David Woodhouse @ 2005-06-13 16:02 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: Jakub Jelinek, Linus Torvalds, Linux Kernel Mailing List, akpm

On Mon, 2005-06-13 at 08:38 -0700, Ulrich Drepper wrote:
> And change it to expect a 64bit value I hope...

64-bit value for which? For seconds? Do we need to support timeouts of
longer than 4 milliard seconds? We can't currently come close to that
anyway -- we're limited to LONG_MAX / HZ seconds, and if the user asks
for more than that then it appears to be silently switched to an
infinite timeout.

> As long as there is a configuration where the timeout value is not
> modified, it doesn't matter.  That is the case for select() using a
> personality switch.

I think it would be best to behave likewise for pselect().

-- 
dwmw2


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 16:02                       ` David Woodhouse
@ 2005-06-13 16:10                         ` Ulrich Drepper
  2005-06-13 16:29                           ` David Woodhouse
  0 siblings, 1 reply; 45+ messages in thread
From: Ulrich Drepper @ 2005-06-13 16:10 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Jakub Jelinek, Linus Torvalds, Linux Kernel Mailing List, akpm

[-- Attachment #1: Type: text/plain, Size: 571 bytes --]

David Woodhouse wrote:
> 64-bit value for which? For seconds?

poll()'s timeout value is measrued in milliseconds.  Using a 32bit
value, as implied by using 'int' for the type, limits the mximum timeout
to be 2^31-1 milliseconds, which means about 24 days.  Believe it or
not, people are complaining about this.  Changing the timeout to a 64
bit millisecond timeout would lift the limitation from the API's POV.  I
don't know what limitations exist in the kernel itself.

-- 
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 251 bytes --]

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 16:10                         ` Ulrich Drepper
@ 2005-06-13 16:29                           ` David Woodhouse
  2005-06-13 16:31                             ` Ulrich Drepper
  2005-06-13 16:36                             ` Jakub Jelinek
  0 siblings, 2 replies; 45+ messages in thread
From: David Woodhouse @ 2005-06-13 16:29 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: Jakub Jelinek, Linus Torvalds, Linux Kernel Mailing List, akpm

On Mon, 2005-06-13 at 09:10 -0700, Ulrich Drepper wrote:
> poll()'s timeout value is measrued in milliseconds.  Using a 32bit
> value, as implied by using 'int' for the type, limits the mximum
> timeout to be 2^31-1 milliseconds, which means about 24 days.

Ah, OK. I thought you were talking about the timespec in pselect(),
because that's what you quoted. 

Yes, we should make the time for ppoll() a 64-bit value, so you can
request a time period longer than 24 days. Shall we also switch to
microseconds?

>   Believe it or not, people are complaining about this.  Changing the
> timeout to a 64bit millisecond timeout would lift the limitation from
> the API's  POV.  I don't know what limitations exist in the kernel
> itself.

Indeed. The ABI will be set in stone, but we have some leeway to fix the
implementation details of the kernel's limitations -- such as the fact
that on a 32-bit box with HZ == 1000, any requested timeout above
MAX_LONG milliseconds (24 days) will actually end up being infinite.

-- 
dwmw2


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 16:29                           ` David Woodhouse
@ 2005-06-13 16:31                             ` Ulrich Drepper
  2005-06-13 21:58                               ` David Woodhouse
  2005-06-13 16:36                             ` Jakub Jelinek
  1 sibling, 1 reply; 45+ messages in thread
From: Ulrich Drepper @ 2005-06-13 16:31 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Jakub Jelinek, Linus Torvalds, Linux Kernel Mailing List, akpm

[-- Attachment #1: Type: text/plain, Size: 456 bytes --]

David Woodhouse wrote:
> Yes, we should make the time for ppoll() a 64-bit value, so you can
> request a time period longer than 24 days. Shall we also switch to
> microseconds?

I don't see the need for higher precision in this API and a change is
confusing.  There is nanosleep() for people who only want a delay and
that can be done with high precision.

-- 
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 251 bytes --]

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 16:29                           ` David Woodhouse
  2005-06-13 16:31                             ` Ulrich Drepper
@ 2005-06-13 16:36                             ` Jakub Jelinek
  1 sibling, 0 replies; 45+ messages in thread
From: Jakub Jelinek @ 2005-06-13 16:36 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Ulrich Drepper, Linus Torvalds, Linux Kernel Mailing List, akpm

On Mon, Jun 13, 2005 at 05:29:37PM +0100, David Woodhouse wrote:
> On Mon, 2005-06-13 at 09:10 -0700, Ulrich Drepper wrote:
> > poll()'s timeout value is measrued in milliseconds.  Using a 32bit
> > value, as implied by using 'int' for the type, limits the mximum
> > timeout to be 2^31-1 milliseconds, which means about 24 days.
> 
> Ah, OK. I thought you were talking about the timespec in pselect(),
> because that's what you quoted. 
> 
> Yes, we should make the time for ppoll() a 64-bit value, so you can
> request a time period longer than 24 days. Shall we also switch to
> microseconds?

I think passing const struct timeval * or const struct timespec *
(depending if you want micro or nanoseconds) is better and what
other functions use for timeouts, then passing int64_t.

	Jakub

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 16:31                             ` Ulrich Drepper
@ 2005-06-13 21:58                               ` David Woodhouse
  2005-06-13 22:01                                 ` Ulrich Drepper
  0 siblings, 1 reply; 45+ messages in thread
From: David Woodhouse @ 2005-06-13 21:58 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: Jakub Jelinek, Linus Torvalds, Linux Kernel Mailing List, akpm

On Mon, 2005-06-13 at 09:31 -0700, Ulrich Drepper wrote:
> I don't see the need for higher precision in this API and a change is
> confusing.  There is nanosleep() for people who only want a delay and
> that can be done with high precision.

If that's the case, might one enquire as to why pselect() has a struct
timespec instead of a struct timeval?

Or would the answer just depress me?
> 
-- 
dwmw2



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 21:58                               ` David Woodhouse
@ 2005-06-13 22:01                                 ` Ulrich Drepper
  0 siblings, 0 replies; 45+ messages in thread
From: Ulrich Drepper @ 2005-06-13 22:01 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Jakub Jelinek, Linus Torvalds, Linux Kernel Mailing List, akpm

[-- Attachment #1: Type: text/plain, Size: 335 bytes --]

David Woodhouse wrote:
> If that's the case, might one enquire as to why pselect() has a struct
> timespec instead of a struct timeval?

General alignment.  We are not adding any new interfaces using timeval
to the specification anymore.

-- 
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 251 bytes --]

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13  7:29           ` David Woodhouse
@ 2005-06-13 22:56             ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 45+ messages in thread
From: Benjamin Herrenschmidt @ 2005-06-13 22:56 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Linus Torvalds, jnf, Alan Cox, Linux Kernel Mailing List, akpm, drepper

On Mon, 2005-06-13 at 08:29 +0100, David Woodhouse wrote:
> On Mon, 2005-06-13 at 17:23 +1000, Benjamin Herrenschmidt wrote:
> > That is still racy ... if the signal hits between loading that global to
> > to pass it to select and the actual syscall entry ... pretty narrow but
> > still there.
> 
> We don't load it; we pass a pointer to select. It works, but it's hardly
> elegant.

Argh true, blame my own wrapper to select that takes an ulong ..

Who said POSIX was a crap API ? :)

Ben.



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 15:38                     ` Ulrich Drepper
  2005-06-13 16:02                       ` David Woodhouse
@ 2005-06-15 11:36                       ` David Woodhouse
  2005-08-05 10:42                         ` pselect() modifying timeout Michael Kerrisk
  2005-08-05 11:58                         ` Add pselect, ppoll system calls Michael Kerrisk
  1 sibling, 2 replies; 45+ messages in thread
From: David Woodhouse @ 2005-06-15 11:36 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: Jakub Jelinek, Linus Torvalds, Linux Kernel Mailing List, akpm

On Mon, 2005-06-13 at 08:38 -0700, Ulrich Drepper wrote:
> > Eep -- I hadn't noticed that difference. Will update patch
> accordingly. 
> 
> And change [the poll timeout] to expect a 64bit value I hope...

I believe that 64-bit int types in syscalls are a pain on some
architectures because of restrictions on precisely which register pairs
may be used. I think I'd rather just use a struct timespec, which also
makes it consistent with pselect().

> > The other documented difference (other than the signal mask bit) is that
> > pselect() is documented not to modify the timeout parameter. I'm not
> > sure whether I should preserve that difference, or not.
> 
> As long as there is a configuration where the timeout value is not
> modified, it doesn't matter.  That is the case for select() using a
> personality switch.

I've made pselect() consistent with select() by using the same
personality switch to control its behaviour.

I've also fixed select() so that timeouts of greater than LONG_MAX
jiffies are implemented accurately, instead of being infinite.

Do this look OK?

Index: arch/i386/kernel/syscall_table.S
===================================================================
--- 719924701adccf053de08548a2195cba0a6198e0/arch/i386/kernel/syscall_table.S  (mode:100644)
+++ uncommitted/arch/i386/kernel/syscall_table.S  (mode:100644)
@@ -289,3 +289,5 @@
 	.long sys_add_key
 	.long sys_request_key
 	.long sys_keyctl
+	.long sys_pselect6
+	.long sys_ppoll			/* 290 */
Index: arch/ppc/kernel/misc.S
===================================================================
--- 719924701adccf053de08548a2195cba0a6198e0/arch/ppc/kernel/misc.S  (mode:100644)
+++ uncommitted/arch/ppc/kernel/misc.S  (mode:100644)
@@ -1441,3 +1441,5 @@
 	.long sys_request_key		/* 270 */
 	.long sys_keyctl
 	.long sys_waitid
+	.long sys_pselect6
+	.long sys_ppoll
Index: arch/ppc64/kernel/misc.S
===================================================================
--- 719924701adccf053de08548a2195cba0a6198e0/arch/ppc64/kernel/misc.S  (mode:100644)
+++ uncommitted/arch/ppc64/kernel/misc.S  (mode:100644)
@@ -953,10 +953,12 @@
 	.llong .compat_sys_mq_getsetattr
 	.llong .sys_ni_syscall		/* 268 reserved for sys_kexec_load */
 	.llong .sys32_add_key
-	.llong .sys32_request_key
+	.llong .sys32_request_key	/* 270 */
 	.llong .compat_sys_keyctl
 	.llong .compat_sys_waitid
-
+	.llong .compat_sys_pselect6
+	.llong .compat_sys_ppoll
+	
 	.balign 8
 _GLOBAL(sys_call_table)
 	.llong .sys_restart_syscall	/* 0 */
@@ -1232,3 +1234,5 @@
 	.llong .sys_request_key		/* 270 */
 	.llong .sys_keyctl
 	.llong .sys_waitid
+	.llong .sys_pselect6
+	.llong .sys_ppoll
Index: arch/ppc64/kernel/signal32.c
===================================================================
--- 719924701adccf053de08548a2195cba0a6198e0/arch/ppc64/kernel/signal32.c  (mode:100644)
+++ uncommitted/arch/ppc64/kernel/signal32.c  (mode:100644)
@@ -112,17 +112,6 @@
 	}
 }
 
-static inline void sigset_from_compat(sigset_t *set, compat_sigset_t *compat)
-{
-	switch (_NSIG_WORDS) {
-	case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32);
-	case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32);
-	case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32);
-	case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32);
-	}
-}
-
-
 /*
  * Save the current user registers on the user stack.
  * We only save the altivec registers if the process has used
Index: fs/compat.c
===================================================================
--- 719924701adccf053de08548a2195cba0a6198e0/fs/compat.c  (mode:100644)
+++ uncommitted/fs/compat.c  (mode:100644)
@@ -1687,35 +1687,13 @@
 #define MAX_SELECT_SECONDS \
 	((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
 
-asmlinkage long
-compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
-		compat_ulong_t __user *exp, struct compat_timeval __user *tvp)
+int compat_core_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
+			   compat_ulong_t __user *exp, long *timeout)
 {
 	fd_set_bits fds;
 	char *bits;
-	long timeout;
 	int size, max_fdset, ret = -EINVAL;
 
-	timeout = MAX_SCHEDULE_TIMEOUT;
-	if (tvp) {
-		time_t sec, usec;
-
-		if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
-		    || __get_user(sec, &tvp->tv_sec)
-		    || __get_user(usec, &tvp->tv_usec)) {
-			ret = -EFAULT;
-			goto out_nofds;
-		}
-
-		if (sec < 0 || usec < 0)
-			goto out_nofds;
-
-		if ((unsigned long) sec < MAX_SELECT_SECONDS) {
-			timeout = ROUND_UP(usec, 1000000/HZ);
-			timeout += sec * (unsigned long) HZ;
-		}
-	}
-
 	if (n < 0)
 		goto out_nofds;
 
@@ -1749,19 +1727,7 @@
 	zero_fd_set(n, fds.res_out);
 	zero_fd_set(n, fds.res_ex);
 
-	ret = do_select(n, &fds, &timeout);
-
-	if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
-		time_t sec = 0, usec = 0;
-		if (timeout) {
-			sec = timeout / HZ;
-			usec = timeout % HZ;
-			usec *= (1000000/HZ);
-		}
-		if (put_user(sec, &tvp->tv_sec) ||
-		    put_user(usec, &tvp->tv_usec))
-			ret = -EFAULT;
-	}
+	ret = do_select(n, &fds, timeout);
 
 	if (ret < 0)
 		goto out;
@@ -1782,6 +1748,173 @@
 	return ret;
 }
 
+asmlinkage long
+compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
+		compat_ulong_t __user *exp, struct compat_timeval __user *tvp)
+{
+	long timeout = MAX_SCHEDULE_TIMEOUT;
+	struct compat_timeval tv;
+	int ret;
+
+	if (tvp) {
+		if (copy_from_user(&tv, tvp, sizeof(tv)))
+			return -EFAULT;
+
+		if (tv.tv_sec < 0 || tv.tv_usec < 0)
+			return -EINVAL;
+	}
+	
+	do {
+		if (tvp) {
+			if ((unsigned long) tv.tv_sec < MAX_SELECT_SECONDS) {
+				timeout = ROUND_UP(tv.tv_usec, 1000000/HZ);
+				timeout += tv.tv_sec * (unsigned long) HZ;
+				tv.tv_sec = 0;
+				tv.tv_usec = 0;
+			} else {
+				tv.tv_sec -= MAX_SELECT_SECONDS;
+				timeout = MAX_SELECT_SECONDS * HZ;
+			}
+		}
+		
+		ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
+
+	} while (!ret && !timeout && tvp && (tv.tv_sec || tv.tv_usec));
+
+	if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
+		tv.tv_sec += timeout / HZ;
+		tv.tv_usec += (timeout % HZ) * 1000000/HZ;
+		if (tv.tv_usec >= 1000000) {
+			tv.tv_sec++;
+			tv.tv_usec -= 1000000;
+		}
+		(void)copy_to_user(tvp, &tv, sizeof(tv));
+	}
+
+	return ret;
+}
+asmlinkage long
+compat_sys_pselect7(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
+		    compat_ulong_t __user *exp, struct compat_timespec __user *tsp,
+		    compat_sigset_t __user *sigmask, compat_size_t sigsetsize)
+{
+	compat_sigset_t s32;
+	sigset_t ksigmask, sigsaved;
+	long timeout = MAX_SCHEDULE_TIMEOUT;
+	struct compat_timespec ts;
+	int ret;
+
+	if (tsp) {
+		if (copy_from_user(&ts, tsp, sizeof(ts)))
+			return -EFAULT;
+
+		if (ts.tv_sec < 0 || ts.tv_nsec < 0)
+			return -EINVAL;
+	}
+
+	if (sigmask) {
+		if (sigsetsize != sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&s32, sigmask, sizeof(s32)))
+			return -EFAULT;
+		sigset_from_compat(&ksigmask, &s32);
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	do {
+		if (tsp) {
+			if ((unsigned long) ts.tv_sec < MAX_SELECT_SECONDS) {
+				timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ);
+				timeout += ts.tv_sec * (unsigned long) HZ;
+				ts.tv_sec = 0;
+				ts.tv_nsec = 0;
+			} else {
+				ts.tv_sec -= MAX_SELECT_SECONDS;
+				timeout = MAX_SELECT_SECONDS * HZ;
+			}
+		}
+
+		ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
+
+	} while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
+
+	if (tsp && !(current->personality & STICKY_TIMEOUTS)) {
+		ts.tv_sec += timeout / HZ;
+		ts.tv_nsec += (timeout % HZ) * (1000000000/HZ);
+		if (ts.tv_nsec >= 1000000000) {
+			ts.tv_sec++;
+			ts.tv_nsec -= 1000000000;
+		}
+		(void)copy_to_user(tsp, &ts, sizeof(ts));
+	}
+
+	if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
+
+asmlinkage long
+compat_sys_pselect6(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp,
+		   compat_ulong_t __user *exp, struct compat_timespec __user *tsp,
+		    void __user *sig)
+{
+	compat_size_t sigsetsize = 0;
+	compat_uptr_t up = 0;
+
+	if (sig) {
+		if (!access_ok(VERIFY_READ, sig, sizeof(compat_uptr_t) + sizeof(compat_size_t))
+		    || __get_user(up, (compat_uptr_t __user *)sig)
+		    || __get_user(sigsetsize, (compat_size_t __user *)(sig+sizeof(up))))
+			return -EFAULT;
+	}
+	return compat_sys_pselect7(n, inp, outp, exp, tsp, compat_ptr(up), sigsetsize);
+}
+
+#define MAX_INT64_SECONDS (((int64_t)(~((uint64_t)0)>>1)/HZ)-1)
+
+asmlinkage long
+compat_sys_ppoll(struct pollfd __user * ufds, unsigned int nfds, struct compat_timespec __user *tsp, 
+		 const compat_sigset_t __user *sigmask, compat_size_t sigsetsize)
+{
+	compat_sigset_t s32;
+	sigset_t ksigmask, sigsaved;
+	struct compat_timespec ts;
+	int64_t timeout = -1;
+	int ret;
+
+	if (tsp) {
+		if (copy_from_user(&ts, tsp, sizeof(ts)))
+			return -EFAULT;
+
+		if (ts.tv_sec < MAX_INT64_SECONDS) {
+			timeout = ROUND_UP(ts.tv_sec, 1000000000/HZ);
+			timeout += ts.tv_sec * HZ;
+		} else
+			timeout = MAX_SCHEDULE_TIMEOUT;
+	}
+
+	if (sigmask) {
+		if (sigsetsize |= sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&s32, sigmask, sizeof(s32)))
+			return -EFAULT;
+		sigset_from_compat(&ksigmask, &s32);
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	ret = do_sys_poll(ufds, nfds, timeout);
+
+	if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;	
+}
+
 #if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
 /* Stuff for NFS server syscalls... */
 struct compat_nfsctl_svc {
Index: fs/select.c
===================================================================
--- 719924701adccf053de08548a2195cba0a6198e0/fs/select.c  (mode:100644)
+++ uncommitted/fs/select.c  (mode:100644)
@@ -292,35 +292,13 @@
 #define MAX_SELECT_SECONDS \
 	((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
 
-asmlinkage long
-sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp)
+static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
+			   fd_set __user *exp, long *timeout)
 {
 	fd_set_bits fds;
 	char *bits;
-	long timeout;
 	int ret, size, max_fdset;
 
-	timeout = MAX_SCHEDULE_TIMEOUT;
-	if (tvp) {
-		time_t sec, usec;
-
-		if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
-		    || __get_user(sec, &tvp->tv_sec)
-		    || __get_user(usec, &tvp->tv_usec)) {
-			ret = -EFAULT;
-			goto out_nofds;
-		}
-
-		ret = -EINVAL;
-		if (sec < 0 || usec < 0)
-			goto out_nofds;
-
-		if ((unsigned long) sec < MAX_SELECT_SECONDS) {
-			timeout = ROUND_UP(usec, 1000000/HZ);
-			timeout += sec * (unsigned long) HZ;
-		}
-	}
-
 	ret = -EINVAL;
 	if (n < 0)
 		goto out_nofds;
@@ -355,18 +333,7 @@
 	zero_fd_set(n, fds.res_out);
 	zero_fd_set(n, fds.res_ex);
 
-	ret = do_select(n, &fds, &timeout);
-
-	if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
-		time_t sec = 0, usec = 0;
-		if (timeout) {
-			sec = timeout / HZ;
-			usec = timeout % HZ;
-			usec *= (1000000/HZ);
-		}
-		put_user(sec, &tvp->tv_sec);
-		put_user(usec, &tvp->tv_usec);
-	}
+	ret = do_select(n, &fds, timeout);
 
 	if (ret < 0)
 		goto out;
@@ -388,6 +355,133 @@
 	return ret;
 }
 
+asmlinkage long
+sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp)
+{
+	long timeout = MAX_SCHEDULE_TIMEOUT;
+	struct timeval tv;
+	int ret;
+
+	if (tvp) {
+		if (copy_from_user(&tv, tvp, sizeof(tv)))
+			return -EFAULT;
+
+		if (tv.tv_sec < 0 || tv.tv_usec < 0)
+			return -EINVAL;
+	}
+
+	do {
+		if (tvp) {
+			if ((unsigned long) tv.tv_sec < MAX_SELECT_SECONDS) {
+				timeout = ROUND_UP(tv.tv_usec, 1000000/HZ);
+				timeout += tv.tv_sec * (unsigned long) HZ;
+				tv.tv_sec = 0;
+				tv.tv_usec = 0;
+			} else {
+				tv.tv_sec -= MAX_SELECT_SECONDS;
+				timeout = MAX_SELECT_SECONDS * HZ;
+			}
+		}
+
+		ret = core_sys_select(n, inp, outp, exp, &timeout);
+
+	} while (!ret && !timeout && tvp && (tv.tv_sec || tv.tv_usec));
+
+	if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
+		tv.tv_sec += timeout / HZ;
+		tv.tv_usec += (timeout % HZ) * 1000000/HZ;
+		if (tv.tv_usec >= 1000000) {
+			tv.tv_sec++;
+			tv.tv_usec -= 1000000;
+		}
+		(void)copy_to_user(tvp, &tv, sizeof(tv));
+	}
+
+	return ret;
+}
+
+asmlinkage long
+sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, 
+	     struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize)
+{
+	long timeout = MAX_SCHEDULE_TIMEOUT;
+	sigset_t ksigmask, sigsaved;
+	struct timespec ts;
+	int ret;
+
+	if (tsp) {
+		if (copy_from_user(&ts, tsp, sizeof(ts)))
+			return -EFAULT;
+
+		if (ts.tv_sec < 0 || ts.tv_nsec < 0)
+			return -EINVAL;
+	}
+
+	if (sigmask) {
+		/* XXX: Don't preclude handling different sized sigset_t's.  */
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	do {
+		if (tsp) {
+			if ((unsigned long) ts.tv_sec < MAX_SELECT_SECONDS) {
+				timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ);
+				timeout += ts.tv_sec * (unsigned long) HZ;
+				ts.tv_sec = 0;
+				ts.tv_nsec = 0;
+			} else {
+				ts.tv_sec -= MAX_SELECT_SECONDS;
+				timeout = MAX_SELECT_SECONDS * HZ;
+			}
+		}
+
+		ret = core_sys_select(n, inp, outp, exp, &timeout);
+
+	} while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
+
+	if (tsp && !(current->personality & STICKY_TIMEOUTS)) {
+		ts.tv_sec += timeout / HZ;
+		ts.tv_nsec += (timeout % HZ) * (1000000000/HZ);
+		if (ts.tv_nsec >= 1000000000) {
+			ts.tv_sec++;
+			ts.tv_nsec -= 1000000000;
+		}
+		(void)copy_to_user(tsp, &ts, sizeof(ts));
+	}
+
+	if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	return ret;
+}
+
+/* Most architectures can't handle 7-argument syscalls. So we provide
+   a 6-argument version where the sixth argument is a pointer to a
+   structure which has a pointer to the sigset_t itself followed by
+   a size_t containing the sigset size. */
+asmlinkage long
+sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
+	     struct timespec __user *tsp, void __user *sig)
+{
+	size_t sigsetsize = 0;
+	sigset_t __user *up = NULL;
+
+	if (sig) {
+		if (!access_ok(VERIFY_READ, sig, sizeof(void *) + sizeof(size_t))
+		    || __get_user(up, (sigset_t * __user *)sig)
+		    || __get_user(sigsetsize, (size_t * __user)(sig+sizeof(void *))))
+			return -EFAULT;
+	}
+	
+	return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize);
+}
+
+
 struct poll_list {
 	struct poll_list *next;
 	int len;
@@ -457,7 +551,7 @@
 	return count;
 }
 
-asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout)
+int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, int64_t timeout)
 {
 	struct poll_wqueues table;
  	int fdcount, err;
@@ -469,14 +563,6 @@
 	if (nfds > current->files->max_fdset && nfds > OPEN_MAX)
 		return -EINVAL;
 
-	if (timeout) {
-		/* Careful about overflow in the intermediate values */
-		if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
-			timeout = (unsigned long)(timeout*HZ+999)/1000+1;
-		else /* Negative or overflow */
-			timeout = MAX_SCHEDULE_TIMEOUT;
-	}
-
 	poll_initwait(&table);
 
 	head = NULL;
@@ -506,7 +592,25 @@
 		}
 		i -= pp->len;
 	}
-	fdcount = do_poll(nfds, head, &table, timeout);
+
+	do {
+		long timeo;
+		
+		if (unlikely(timeout >= (int64_t)MAX_SCHEDULE_TIMEOUT - 1)) {
+			timeo = MAX_SCHEDULE_TIMEOUT - 1;
+			timeout -= timeo;
+		} else {
+			if (timeout < 0)
+				timeo = MAX_SCHEDULE_TIMEOUT;
+			else
+				timeo = timeout;
+
+			timeout = 0;
+		}
+
+	    fdcount = do_poll(nfds, head, &table, timeo);
+
+	} while (!fdcount && !signal_pending(current) && timeout);
 
 	/* OK, now copy the revents fields back to user space. */
 	walk = head;
@@ -534,3 +638,56 @@
 	poll_freewait(&table);
 	return err;
 }
+
+asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout)
+{
+	if (timeout) {
+		/* Careful about overflow in the intermediate values */
+		if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
+			timeout = (unsigned long)(timeout*HZ+999)/1000+1;
+		else /* Negative or overflow */
+			timeout = -1;
+	}
+
+	return do_sys_poll(ufds, nfds, (int64_t)timeout);
+}
+
+#define MAX_INT64_SECONDS (((int64_t)(~((uint64_t)0)>>1)/HZ)-1)
+
+asmlinkage long sys_ppoll(struct pollfd __user * ufds, unsigned int nfds, struct timespec __user *tsp,
+			  const sigset_t __user *sigmask, size_t sigsetsize)
+{
+	sigset_t ksigmask, sigsaved;
+	struct timespec ts;
+	int64_t timeout = -1;
+	int ret;
+
+	if (tsp) {
+		if (copy_from_user(&ts, tsp, sizeof(ts)))
+			return -EFAULT;
+
+		if (ts.tv_sec < MAX_INT64_SECONDS) {
+			timeout = ROUND_UP(ts.tv_sec, 1000000000/HZ);
+			timeout += ts.tv_sec * HZ;
+		} else
+			timeout = MAX_SCHEDULE_TIMEOUT;
+	}
+
+	if (sigmask) {
+		/* XXX: Don't preclude handling different sized sigset_t's.  */
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	ret = do_sys_poll(ufds, nfds, timeout);
+
+	if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
Index: include/asm-i386/unistd.h
===================================================================
--- 719924701adccf053de08548a2195cba0a6198e0/include/asm-i386/unistd.h  (mode:100644)
+++ uncommitted/include/asm-i386/unistd.h  (mode:100644)
@@ -294,8 +294,10 @@
 #define __NR_add_key		286
 #define __NR_request_key	287
 #define __NR_keyctl		288
+#define __NR_pselect6		289
+#define __NR_ppoll		290
 
-#define NR_syscalls 289
+#define NR_syscalls 291
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
Index: include/asm-ppc/unistd.h
===================================================================
--- 719924701adccf053de08548a2195cba0a6198e0/include/asm-ppc/unistd.h  (mode:100644)
+++ uncommitted/include/asm-ppc/unistd.h  (mode:100644)
@@ -277,8 +277,10 @@
 #define __NR_request_key	270
 #define __NR_keyctl		271
 #define __NR_waitid		272
+#define __NR_pselect6		273
+#define __NR_ppoll		274
 
-#define __NR_syscalls		273
+#define __NR_syscalls		275
 
 #define __NR(n)	#n
 
Index: include/asm-ppc64/unistd.h
===================================================================
--- 719924701adccf053de08548a2195cba0a6198e0/include/asm-ppc64/unistd.h  (mode:100644)
+++ uncommitted/include/asm-ppc64/unistd.h  (mode:100644)
@@ -283,8 +283,10 @@
 #define __NR_request_key	270
 #define __NR_keyctl		271
 #define __NR_waitid		272
+#define __NR_pselect6		273
+#define __NR_ppoll		274
 
-#define __NR_syscalls		273
+#define __NR_syscalls		275
 #ifdef __KERNEL__
 #define NR_syscalls	__NR_syscalls
 #endif
Index: include/linux/compat.h
===================================================================
--- 719924701adccf053de08548a2195cba0a6198e0/include/linux/compat.h  (mode:100644)
+++ uncommitted/include/linux/compat.h  (mode:100644)
@@ -48,6 +48,7 @@
 	compat_sigset_word	sig[_COMPAT_NSIG_WORDS];
 } compat_sigset_t;
 
+extern void sigset_from_compat (sigset_t *set, compat_sigset_t *compat);
 extern int cp_compat_stat(struct kstat *, struct compat_stat __user *);
 extern int get_compat_timespec(struct timespec *, const struct compat_timespec __user *);
 extern int put_compat_timespec(const struct timespec *, struct compat_timespec __user *);
Index: include/linux/poll.h
===================================================================
--- 719924701adccf053de08548a2195cba0a6198e0/include/linux/poll.h  (mode:100644)
+++ uncommitted/include/linux/poll.h  (mode:100644)
@@ -93,6 +93,7 @@
 }
 
 extern int do_select(int n, fd_set_bits *fds, long *timeout);
+extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, int64_t timeout);
 
 #endif /* KERNEL */
 


-- 
dwmw2


^ permalink raw reply	[flat|nested] 45+ messages in thread

* pselect() modifying timeout
  2005-06-15 11:36                       ` David Woodhouse
@ 2005-08-05 10:42                         ` Michael Kerrisk
  2005-08-05 14:50                           ` Ulrich Drepper
  2005-08-08 11:10                           ` Alan Cox
  2005-08-05 11:58                         ` Add pselect, ppoll system calls Michael Kerrisk
  1 sibling, 2 replies; 45+ messages in thread
From: Michael Kerrisk @ 2005-08-05 10:42 UTC (permalink / raw)
  To: David Woodhouse
  Cc: drepper, jakub, linux-kernel, bert.hubert, michael.kerrisk, akpm

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="us-ascii", Size: 1774 bytes --]

Hello David,

By the way, looking at the comments to the last version of
the pselect()/ppoll()patch, I see that the treatment of 
the timeout argument is made dependent on the personality.  

http://marc.theaimsgroup.com/?l=linux-kernel&m=111883591220436&w=2

I'm not sure that this is a good idea; my reasons as follows:

1. POSIX made the behaviour of pselect() explicit -- the 
   timeout must not be modified.  The idea was to avoid the 
   vagueness of the select() specification; it had to be vague 
   because of existing implementations. By contrast, there were 
   no pre-existing implementations when pselect() was specified.  
   (By the way, although one or two posts in the earlier thread 
   implied that pselect() has long/widely been present on 
   some systems, this is almost certainly not true.  The only 
   systems where I believe it is currently implemented are two that 
   were recently Unix 03 certified: Solaris 10 and AIX (5.3?).  I 
   know from doing quite a bit of checking that it is not present 
   as a kernel implementation on most (all?) other systems (even
   though it was already described by POSIX.1g and Richard 
   Stevens 7 years ago))  

   I haven't tested Solaris 10 and AIX, but I think one can be 
   reasonably sure that they would conform to the letter of 
   POSIX law.  Lacking any strong reason to the contrary, 
   Linux should (IMO) too (why gratuitously introduce 
   differences across implementations?).

2. The existing (non-atomic) glibc pselect() implementation 
   does not change the timeout argument.

Please consider making Linux pselect() conform to POSIX on this 
point.

Cheers,

Michael

-- 
5 GB Mailbox, 50 FreeSMS http://www.gmx.net/de/go/promail
+++ GMX - die erste Adresse für Mail, Message, More +++

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-15 11:36                       ` David Woodhouse
  2005-08-05 10:42                         ` pselect() modifying timeout Michael Kerrisk
@ 2005-08-05 11:58                         ` Michael Kerrisk
  2005-08-05 12:49                           ` Michael Kerrisk
  1 sibling, 1 reply; 45+ messages in thread
From: Michael Kerrisk @ 2005-08-05 11:58 UTC (permalink / raw)
  To: David Woodhouse; +Cc: drepper, jakub, torvalds, linux-kernel, akpm

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="us-ascii", Size: 5875 bytes --]

> From: David Woodhouse <dwmw2@infradead.org>
> Subject: Re: Add pselect, ppoll system calls.
> Date: Wed, 15 Jun 2005 12:36:54 +0100
> 
> On Mon, 2005-06-13 at 08:38 -0700, Ulrich Drepper wrote:
> > > Eep -- I hadn't noticed that difference. Will update patch
> > accordingly. 
> > 
> > And change [the poll timeout] to expect a 64bit value I hope...
> 
> I believe that 64-bit int types in syscalls are a pain on some
> architectures because of restrictions on precisely which register pairs
> may be used. I think I'd rather just use a struct timespec, which also
> makes it consistent with pselect().
> 
> > > The other documented difference (other than the signal mask bit) is
> > > that
> > > pselect() is documented not to modify the timeout parameter. I'm not
> > > sure whether I should preserve that difference, or not.
> > 
> > As long as there is a configuration where the timeout value is not
> > modified, it doesn't matter.  That is the case for select() using a
> > personality switch.
> 
> I've made pselect() consistent with select() by using the same
> personality switch to control its behaviour.
> 
> I've also fixed select() so that timeouts of greater than LONG_MAX
> jiffies are implemented accurately, instead of being infinite.

Hi David,

I applied the 15 Jun version of your patch agains 2.6.12 and 
tried a test program.  I see some behaviour that puzzles me.
When I run the program below, and type control-C, the program
tells me that the SIGINT hander was *not* called:

./t_pselect -
Making syscall(SYS_pselect6) call
[Type ^C]
pselect: Unknown error 514           <-- ERESTNOHAND
ready = -1
stdin IS NOT readable
signal handler WAS NOT called

I'm not sure whether this is possibly a mistake in the way 
I've constructed my test program (I don't think so, but I'm 
not !00% confident), or some bug in the implementation
(I did try making a syscall(SYS__newselect), and that *did* 
show the signal handler being called.  Also, I now have a test 
result on Solaris 10 for pselect(), and it shows the signal 
handler is called in this case.)

Cheers,

Michael

/* t_pselect.c

   Michael Kerrisk, Aug 2005
*/
#if defined(__linux__) && !defined(NO_SYSCALL)
#define USE_PSELECT_SYSCALL 1
#endif

#ifdef USE_PSELECT_SYSCALL
#define _GNU_SOURCE
#include <syscall.h>
#endif
#include <sys/time.h>
#include <sys/select.h>
#include <signal.h>
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>

#define errMsg(msg)             do { perror(msg); } while (0)

#define errExit(msg)            do { perror(msg); exit(EXIT_FAILURE); \
                                } while (0)

#ifdef USE_PSELECT_SYSCALL
/* Following are for x86 */
#define SYS_pselect6           289
#define SYS_ppoll              290
#endif

sig_atomic_t gotsig = 0;

static void
handler(int sig)
{
    gotsig = 1;
    printf("Caught signal %d\n", sig);  /* UNSAFE (see Section $$$) */
} /* handler */

int
main(int argc, char *argv[])
{
    fd_set readfds;
    int ready, nfds;
    struct timespec timeout;
    struct timespec *pto;
    struct sigaction sa;
#ifdef USE_PSELECT_SYSCALL
    struct {
        sigset_t *sp;
        size_t size;
    } pselarg6;
#endif
    sigset_t empty, all;

    if (argc != 2) {
        fprintf(stderr, "Usage: %s {nsecs|-}\n", argv[1]);
        exit(EXIT_FAILURE);
    }

    setbuf(stdout, NULL);

   /* Block all sigs */

    sigfillset(&all);
    if (sigprocmask(SIG_BLOCK, &all, NULL) == -1) errExit("sigprocmask");

    /* Establish hander for SIGINT */

    sa.sa_handler = handler;
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = 0;
    if (sigaction(SIGINT, &sa, NULL) == -1) errExit("sigaction");

    /* Timeout is specified in argv[1] */

    if (strcmp(argv[1], "-") == 0) {
        pto = NULL;             /* Infinite timeout */
    } else {
        pto = &timeout;
        timeout.tv_sec = atoi(argv[1]);
        timeout.tv_nsec = 0;
    }

    /* Initialize descriptor set */

    nfds = 1;
    FD_ZERO(&readfds);
    FD_SET(STDIN_FILENO, &readfds);

    sigemptyset(&empty);
    /* sigaddset(&empty, SIGINT); */

    /* Make the call */

#ifdef USE_PSELECT_SYSCALL
#if 1
    printf("Making syscall(SYS_pselect6) call\n");
    pselarg6.sp = &empty;
    pselarg6.size = 8; /* sizeof(sigset_t) */
    ready = syscall(SYS_pselect6, nfds, &readfds, NULL, NULL,
                    pto, &pselarg6);
#else
    /* The following is just some testing cruft */
    {
    struct timeval tv;
    struct timeval *ptv;

    if (strcmp(argv[1], "-") == 0) {
        ptv = NULL;             /* Infinite timeout */
    } else {
        ptv = &tv;
        tv.tv_sec = atoi(argv[1]);
        tv.tv_usec = 0;
    }

    if (sigprocmask(SIG_SETMASK, &empty, NULL) == -1)
        errExit("sigprocmask");

    printf("Making syscall(SYS__newselect) call\n");
    ready = syscall(SYS__newselect, nfds, &readfds, NULL, NULL, ptv);
    printf("!!!! Ignore timeout information printed below !!!!\n");
    }
#endif


#else
    /* This is how a "proper" pselect() call looks on other
       implementations, or when calling the non-atomic glibc version */
    printf("Making simple pselect() call\n");
    ready = pselect(nfds, &readfds, NULL, NULL, pto, &empty);
#endif

    /* Now see what happened */

    if (ready == -1) errMsg("pselect");

    printf("ready = %d\n", ready);
    printf("stdin %s readable\n",
                ready > 0 && FD_ISSET(STDIN_FILENO, &readfds) ?
                        "IS" : "IS NOT");
    printf("Signal handler %s called\n", gotsig ? "WAS" : "WAS NOT");

    if (pto != NULL)
        printf("timeout after select(): %ld.%03ld\n",
               (long) timeout.tv_sec, (long) timeout.tv_nsec / 10000000);
    exit(EXIT_SUCCESS);
} /* main */

-- 
5 GB Mailbox, 50 FreeSMS http://www.gmx.net/de/go/promail
+++ GMX - die erste Adresse für Mail, Message, More +++

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-08-05 11:58                         ` Add pselect, ppoll system calls Michael Kerrisk
@ 2005-08-05 12:49                           ` Michael Kerrisk
  2005-08-25  0:04                             ` David Woodhouse
  0 siblings, 1 reply; 45+ messages in thread
From: Michael Kerrisk @ 2005-08-05 12:49 UTC (permalink / raw)
  To: dwmw2; +Cc: drepper, jakub, torvalds, linux-kernel, akpm, michael.kerrisk

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="us-ascii", Size: 3864 bytes --]

Hi David,

Not sure if your message below was meant as a reply-all or not,
but I've brought it back onto the list.

There are some problems with your test program -- 
it's not actually using pselect() in proper way, which is:

    /* Block signals that will be later unblocked by pselect() */

    sigemptyset(&block);
    sigaddset(&block, SIGINT);
    sigprocmask(SIG_BLOCK, &block, NULL);

    sigemptyset(&set);
    status = my_pselect(1, &s, NULL, NULL, &timeout, &set);

If I change your program to do something like the above, I also
do not see a message from the handler -- i.e., it is not being
called, and I'm pretty sure it should be.

Below, is my modified version of your program -- it uses SIGINT.

Cheers,

Michael

#define _GNU_SOURCE
#include <syscall.h>
#include <unistd.h>
#include <signal.h>
#include <sys/select.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>

#define __NR_pselect6 289

static int my_pselect(int   n,
       fd_set   *readfds,  fd_set  *writefds,  fd_set
       *exceptfds, const struct timespec *timeout, sigset_t *sigmask)
{
        struct {
                sigset_t *set;
                size_t size;
        } __attribute__((packed)) arg6;

        arg6.size = 8;
        arg6.set = sigmask;
        return syscall(__NR_pselect6, n, readfds, writefds,
                exceptfds, timeout, &arg6);
}

static void usr1_handler(int signo)
{
        write(1, "handler called\n", 20);
}


int main(void)
{
        fd_set s;
        int status;
        sigset_t set, block;
        struct timespec timeout;

        timeout.tv_sec = 30;
        timeout.tv_nsec = 0;

        FD_ZERO(&s);
        FD_SET(0, &s);

        signal(SIGINT, usr1_handler);

        sigemptyset(&block);
        sigaddset(&block, SIGINT);
        sigprocmask(SIG_BLOCK, &block, NULL);

        sigemptyset(&set);
        status = my_pselect(1, &s, NULL, NULL, &timeout, &set);
        printf("status=%d\n", status);
        if (status == -1) perror("pselect");
}

--- Weitergeleitete Nachricht ---
Von: David Woodhouse <dwmw2@infradead.org>
An: Michael Kerrisk <mtk-lkml@gmx.net>
Betreff: Re: Add pselect, ppoll system calls.
Datum: Fri, 05 Aug 2005 13:03:14 +0100

This is the test program I used. I send it SIGUSR1 manually.

#define __USE_MISC
#include <unistd.h>
#include <signal.h>
#include <sys/select.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>

#define __NR_pselect6 273

int my_pselect(int   n,   fd_set   *readfds,  fd_set  *writefds,  fd_set
       *exceptfds, const struct timespec *timeout, const sigset_t *sigmask)
{
        struct {
                sigset_t *set;
                size_t size;
        } __attribute__((packed)) arg6;

        arg6.size = 8;
        arg6.set = sigmask;
        return syscall(__NR_pselect6, n, readfds, writefds, exceptfds,
timeout, &arg6);
}

void usr1_handler(int signo)
{
        write(1, "usr1_handler called\n", 20);
}
int main(void)
{
        fd_set s;
        int p;
        char buf[80];
        sigset_t set;
        fcntl(0, F_SETFL, O_NONBLOCK | fcntl(0, F_GETFL));

        FD_ZERO(&s);
        FD_SET(0, &s);

        sigfillset(&set);

        signal(SIGUSR1, usr1_handler);

        printf("I am %d\n", getpid());

        while (1) {
                printf("Block usr1\n");
                fflush(stdout);
                sigaddset(&set, SIGUSR1);
                my_pselect(1, &s, NULL, NULL, NULL, &set);
                read(0, buf, 80);
                printf("Allow usr1\n");
                fflush(stdout);
                sigdelset(&set, SIGUSR1);
                my_pselect(1, &s, NULL, NULL, NULL, &set);
                read(0, buf, 80);
        }

}


-- 
5 GB Mailbox, 50 FreeSMS http://www.gmx.net/de/go/promail
+++ GMX - die erste Adresse für Mail, Message, More +++

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: pselect() modifying timeout
  2005-08-05 10:42                         ` pselect() modifying timeout Michael Kerrisk
@ 2005-08-05 14:50                           ` Ulrich Drepper
  2005-08-05 20:08                             ` Michael Kerrisk
  2005-08-08 11:10                           ` Alan Cox
  1 sibling, 1 reply; 45+ messages in thread
From: Ulrich Drepper @ 2005-08-05 14:50 UTC (permalink / raw)
  To: Michael Kerrisk
  Cc: David Woodhouse, jakub, linux-kernel, bert.hubert, michael.kerrisk, akpm

[-- Attachment #1: Type: text/plain, Size: 495 bytes --]

Michael Kerrisk wrote:
> Please consider making Linux pselect() conform to POSIX on this 
> point.

There is no question the implementation will conform.  But this not
dependent on changing the syscall interface.  We deliberately decided to
not require the kernel interface to be different from select.  The
userlevel code will take care of the difference.  The kernel code is
good as proposed.

-- 
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 251 bytes --]

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: pselect() modifying timeout
  2005-08-05 14:50                           ` Ulrich Drepper
@ 2005-08-05 20:08                             ` Michael Kerrisk
  0 siblings, 0 replies; 45+ messages in thread
From: Michael Kerrisk @ 2005-08-05 20:08 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: David Woodhouse, jakub, linux-kernel, bert.hubert, akpm, mtk-lkml

> Michael Kerrisk wrote:
> > Please consider making Linux pselect() conform to POSIX on this 
> > point.
> 
> There is no question the implementation will conform.  But this not
> dependent on changing the syscall interface.  We deliberately decided to
> not require the kernel interface to be different from select.  The
> userlevel code will take care of the difference.  The kernel code is good
> as proposed.

Okay -- thanks for the info.

Cheers,

Michael




^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: pselect() modifying timeout
  2005-08-05 10:42                         ` pselect() modifying timeout Michael Kerrisk
  2005-08-05 14:50                           ` Ulrich Drepper
@ 2005-08-08 11:10                           ` Alan Cox
  1 sibling, 0 replies; 45+ messages in thread
From: Alan Cox @ 2005-08-08 11:10 UTC (permalink / raw)
  To: Michael Kerrisk
  Cc: David Woodhouse, drepper, jakub, linux-kernel, bert.hubert,
	michael.kerrisk, akpm

On Gwe, 2005-08-05 at 12:42 +0200, Michael Kerrisk wrote:
> 1. POSIX made the behaviour of pselect() explicit -- the 
>    timeout must not be modified.  The idea was to avoid the 
>    vagueness of the select() specification; it had to be vague 
>    because of existing implementations. By contrast, there were 

Unfortunately it made the wrong choice with pselect, as Linux select
experience has shown the modified timeout is *very* useful data to some
applications. So the patch is better than the POSUX behaviour. The
library can wrap it to provide the poorer standards compliant API while
not stopping people using the better one for Linux specific apps.


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-08-05 12:49                           ` Michael Kerrisk
@ 2005-08-25  0:04                             ` David Woodhouse
  2005-08-25  0:34                               ` Ulrich Drepper
  2005-08-26  6:46                               ` Michael Kerrisk
  0 siblings, 2 replies; 45+ messages in thread
From: David Woodhouse @ 2005-08-25  0:04 UTC (permalink / raw)
  To: Michael Kerrisk
  Cc: drepper, jakub, torvalds, linux-kernel, akpm, michael.kerrisk

(Sorry for delayed response)

On Fri, 2005-08-05 at 14:49 +0200, Michael Kerrisk wrote:
> If I change your program to do something like the above, I also
> do not see a message from the handler -- i.e., it is not being
> called, and I'm pretty sure it should be.

Hm, yes. What happens is we come back out of the select() immediately
because of the pending signal, but on the way back to userspace we put
the old signal mask back... so by the time we check for it, there _is_
no (unblocked) signal pending. 

If it's mandatory that we actually call the signal handler, then we need
to play tricks like sigsuspend() does to leave the old signal mask on
the stack frame. That's a bit painful atm because do_signal is different
between architectures. 

-- 
dwmw2



^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-08-25  0:04                             ` David Woodhouse
@ 2005-08-25  0:34                               ` Ulrich Drepper
  2005-08-26  6:46                               ` Michael Kerrisk
  1 sibling, 0 replies; 45+ messages in thread
From: Ulrich Drepper @ 2005-08-25  0:34 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Michael Kerrisk, jakub, torvalds, linux-kernel, akpm, michael.kerrisk

[-- Attachment #1: Type: text/plain, Size: 826 bytes --]

David Woodhouse wrote:
> If it's mandatory that we actually call the signal handler, then we need
> to play tricks like sigsuspend() does to leave the old signal mask on
> the stack frame. That's a bit painful atm because do_signal is different
> between architectures. 

It is necessary that the handler is called.  This is the purpose of
these interfaces.  If this means more complexity is needed then this is
how the cookie crumbles.  One use case for pselect would be something
like this:


int got_signal;
void sigint_handler(int sig) {
  got_signal = 1;
}

{
  ...
  while (1) {
    if (!got_signal)
      pselect()

    if (got_signal) {
      handle signal
      got_signal = 0;
    }
  }
  ...
}

-- 
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 251 bytes --]

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-08-25  0:04                             ` David Woodhouse
  2005-08-25  0:34                               ` Ulrich Drepper
@ 2005-08-26  6:46                               ` Michael Kerrisk
  1 sibling, 0 replies; 45+ messages in thread
From: Michael Kerrisk @ 2005-08-26  6:46 UTC (permalink / raw)
  To: David Woodhouse
  Cc: drepper, jakub, torvalds, linux-kernel, akpm, michael.kerrisk, mtk-lkml

David,

> On Fri, 2005-08-05 at 14:49 +0200, Michael Kerrisk wrote:
> > If I change your program to do something like the above, I also
> > do not see a message from the handler -- i.e., it is not being
> > called, and I'm pretty sure it should be.
> 
> Hm, yes. What happens is we come back out of the select() immediately
> because of the pending signal, but on the way back to userspace we put the
> old signal mask back... so by the time we check for it, there _is_ no
> (unblocked) signal pending. 
> 
> If it's mandatory that we actually call the signal handler, 

I'm just about to go off on holiday, and don't have a chance to pull up 
all the relevant standards details at them moment.  However, I'm fairly 
sure that the signal handler should be called.  (Try running a modified 
version of my program on Solaris 10 or the Unix-03 conversion of AIX 
(5.3?).)

> then we need to
> play tricks like sigsuspend() does to leave the old signal mask on the
> stack frame. That's a bit painful atm because do_signal is different
> between architectures. 

Yes, I'd say the behaviour should in fact be like what sigsuspend() does.

Cheers,

Michael

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-14 14:07       ` Mattias Engdegård
  2005-06-14 14:16         ` Bernd Petrovitsch
@ 2005-06-14 15:27         ` Valdis.Kletnieks
  1 sibling, 0 replies; 45+ messages in thread
From: Valdis.Kletnieks @ 2005-06-14 15:27 UTC (permalink / raw)
  To: Mattias Engdegård
  Cc: cfriesen, jakub, torvalds, linux-kernel, akpm, dwmw2, drepper

[-- Attachment #1: Type: text/plain, Size: 1389 bytes --]

On Tue, 14 Jun 2005 16:07:54 +0200, =?ISO-8859-1?Q?Mattias Engdeg=E5rd?= said:
> >Monotonic clocks are guaranteed to not go backward. A sudden warp 35
> >seconds into the future when you have timers set for 15 and 20
> >seconds into the future is still ugly....
> 
> I don't have the POSIX specs handy, but I see no reason we could not let
> it use a warpless monotonic clock.

Right - I just mentioned it because "warpless" is an additional constraint
on the clock implementation.

The last time I looked at the NTP code, there was a definite upper limit to
how much warpless slewing of the clock was easily achievable - if you had a
large initial adjustment to make, you still have to either take one large
step, or be prepared to spend literally hours/days slewing the clock.

And then there's always the sysadmin issuing a 'date' command to manually
set the clock.  The kernel *still* needs to be able to handle warps correctly,
because I believe 'date' *is* in Posix? ;)

(The other possible way to handle this is the old IBM S/370 way - where setting
the system clock by warping it was only practical during system boot (as it was
done before any timers got launched), and required an operator intervention to
hit the 'TOD Clock Set Enable' button.  So the operator would look at their
watch, enter a time 15-20 seconds into the future, and whack the button at the
specified time).


[-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --]

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-14 14:42           ` Mattias Engdegård
@ 2005-06-14 14:54             ` Bernd Petrovitsch
  0 siblings, 0 replies; 45+ messages in thread
From: Bernd Petrovitsch @ 2005-06-14 14:54 UTC (permalink / raw)
  To: Mattias Engdegård; +Cc: Valdis.Kletnieks, cfriesen, linux-kernel, dwmw2

Trimmed Cc: so reduce mail on already mail-overloaded folks.

On Tue, 2005-06-14 at 16:42 +0200, =?ISO-8859-1?Q?Mattias Engdeg=E5rd?=
wrote:
> >> I don't have the POSIX specs handy, but I see no reason we could not let
> >> it use a warpless monotonic clock.
> >
> >You have already one - the uptime of the system.
> 
> For example, yes. 
> 
> >Doing "Relative timeouts" with "gettimeofday()" is a strategic error.
> >Specify the timeout und use (the return value of) times(2) for this.
> 
> Having an interface use absolute timeouts will avoid this strategic error,

If the absolute timeouts means "the uptime in some point in the future",
yes.
If the absolute timeouts means "the time in the real world", then it
*is* the strategic error. This "time in the real world" (if it is usable
on the user interface), is neither monotonic nor warpless.

> simplify common code, and reduce the number of needed syscalls.

	Bernd
-- 
Firmix Software GmbH                   http://www.firmix.at/
mobil: +43 664 4416156                 fax: +43 1 7890849-55
          Embedded Linux Development and Services


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-14 14:16         ` Bernd Petrovitsch
@ 2005-06-14 14:42           ` Mattias Engdegård
  2005-06-14 14:54             ` Bernd Petrovitsch
  0 siblings, 1 reply; 45+ messages in thread
From: Mattias Engdegård @ 2005-06-14 14:42 UTC (permalink / raw)
  To: bernd
  Cc: Valdis.Kletnieks, cfriesen, jakub, torvalds, linux-kernel, akpm,
	dwmw2, drepper

>> I don't have the POSIX specs handy, but I see no reason we could not let
>> it use a warpless monotonic clock.
>
>You have already one - the uptime of the system.

For example, yes. 

>Doing "Relative timeouts" with "gettimeofday()" is a strategic error.
>Specify the timeout und use (the return value of) times(2) for this.

Having an interface use absolute timeouts will avoid this strategic error,
simplify common code, and reduce the number of needed syscalls.

>Use "gettimeofday()" and similar just if (and only if) you communicate
>with the user (read: that is a pure user interface issue).

Of course, but it is not uncommon, perhaps because it's tedious to
convert between clock ticks and a struct timeval.
There is also the problem that the clock tick resolution has historically
been low (commonly 10 ms), which may cause bigger jitter in the longer term.
Also, a clock_t is frequently only 32 bits wide.

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-14 14:07       ` Mattias Engdegård
@ 2005-06-14 14:16         ` Bernd Petrovitsch
  2005-06-14 14:42           ` Mattias Engdegård
  2005-06-14 15:27         ` Valdis.Kletnieks
  1 sibling, 1 reply; 45+ messages in thread
From: Bernd Petrovitsch @ 2005-06-14 14:16 UTC (permalink / raw)
  To: Mattias Engdegård
  Cc: Valdis.Kletnieks, cfriesen, jakub, torvalds, linux-kernel, akpm,
	dwmw2, drepper

On Tue, 2005-06-14 at 16:07 +0200, =?ISO-8859-1?Q?Mattias Engdeg=E5rd?=
wrote:
> >Monotonic clocks are guaranteed to not go backward. A sudden warp 35
> >seconds into the future when you have timers set for 15 and 20
> >seconds into the future is still ugly....
> 
> I don't have the POSIX specs handy, but I see no reason we could not let
> it use a warpless monotonic clock.

You have already one - the uptime of the system.

> The problem of timeouts going wild when time is being warped applies
> to syscalls using relative timeouts as well. Even when a relative
> timeout is wanted, it is usually transformed (via gettimeofday or

Doing "Relative timeouts" with "gettimeofday()" is a strategic error.
Specify the timeout und use (the return value of) times(2) for this.

Use "gettimeofday()" and similar just if (and only if) you communicate
with the user (read: that is a pure user interface issue).

	Bernd
-- 
Firmix Software GmbH                   http://www.firmix.at/
mobil: +43 664 4416156                 fax: +43 1 7890849-55
          Embedded Linux Development and Services


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 20:23     ` Valdis.Kletnieks
@ 2005-06-14 14:07       ` Mattias Engdegård
  2005-06-14 14:16         ` Bernd Petrovitsch
  2005-06-14 15:27         ` Valdis.Kletnieks
  0 siblings, 2 replies; 45+ messages in thread
From: Mattias Engdegård @ 2005-06-14 14:07 UTC (permalink / raw)
  To: Valdis.Kletnieks
  Cc: cfriesen, jakub, torvalds, linux-kernel, akpm, dwmw2, drepper

>Monotonic clocks are guaranteed to not go backward. A sudden warp 35
>seconds into the future when you have timers set for 15 and 20
>seconds into the future is still ugly....

I don't have the POSIX specs handy, but I see no reason we could not let
it use a warpless monotonic clock.

The problem of timeouts going wild when time is being warped applies
to syscalls using relative timeouts as well. Even when a relative
timeout is wanted, it is usually transformed (via gettimeofday or
similar) to an absolute timeout:

   T = some_clock() + dT
   timeout = dT
   loop:
      poll(..., timeout)
      if (poll did not time out):
          now = some_clock()
          timeout = MAX(0, T - now)
          goto loop

This kind of code is very common, because the timeout is usually the
time to some event in the future. If some_clock() is subject to
warping (which is the case when gettimeofday() is used), then you have
the problem again.

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 20:08   ` Mattias Engdegård
@ 2005-06-13 20:23     ` Valdis.Kletnieks
  2005-06-14 14:07       ` Mattias Engdegård
  0 siblings, 1 reply; 45+ messages in thread
From: Valdis.Kletnieks @ 2005-06-13 20:23 UTC (permalink / raw)
  To: Mattias Engdegård
  Cc: cfriesen, jakub, torvalds, linux-kernel, akpm, dwmw2, drepper

[-- Attachment #1: Type: text/plain, Size: 437 bytes --]

On Mon, 13 Jun 2005 22:08:55 +0200, =?ISO-8859-1?Q?Mattias Engdeg=E5rd?= said:
> >Mattias Engdegård wrote:

> >Absolute timestamps are messy though.  How do you deal with system time 
> >changes?
> 
> Monotonic clocks are there for exactly that, no?

Monotonic clocks are guaranteed to not go backward.  A sudden warp 35 seconds
into the future when you have timers set for 15 and 20 seconds into the future
is still ugly....

[-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --]

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 19:57 ` Chris Friesen
@ 2005-06-13 20:08   ` Mattias Engdegård
  2005-06-13 20:23     ` Valdis.Kletnieks
  0 siblings, 1 reply; 45+ messages in thread
From: Mattias Engdegård @ 2005-06-13 20:08 UTC (permalink / raw)
  To: cfriesen; +Cc: jakub, torvalds, linux-kernel, akpm, dwmw2, drepper

>Mattias Engdegård wrote:
>
>> If we can design ppoll() any way we like, which seems likely, I would
>> prefer having the timeout given as an absolute timestamp.
>
>Absolute timestamps are messy though.  How do you deal with system time 
>changes?

Monotonic clocks are there for exactly that, no?

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
  2005-06-13 19:38 Mattias Engdegård
@ 2005-06-13 19:57 ` Chris Friesen
  2005-06-13 20:08   ` Mattias Engdegård
  0 siblings, 1 reply; 45+ messages in thread
From: Chris Friesen @ 2005-06-13 19:57 UTC (permalink / raw)
  To: Mattias Engdegård
  Cc: Jakub Jelinek, Linus Torvalds, Linux Kernel Mailing List, akpm,
	David Woodhouse, Ulrich Drepper

Mattias Engdegård wrote:

> If we can design ppoll() any way we like, which seems likely, I would
> prefer having the timeout given as an absolute timestamp.

Absolute timestamps are messy though.  How do you deal with system time 
changes?

Chris

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Add pselect, ppoll system calls.
@ 2005-06-13 19:38 Mattias Engdegård
  2005-06-13 19:57 ` Chris Friesen
  0 siblings, 1 reply; 45+ messages in thread
From: Mattias Engdegård @ 2005-06-13 19:38 UTC (permalink / raw)
  To: Jakub Jelinek, Linus Torvalds, Linux Kernel Mailing List, akpm,
	David Woodhouse, Ulrich Drepper

> I think passing const struct timeval * or const struct timespec *
> (depending if you want micro or nanoseconds) is better and what
> other functions use for timeouts, then passing int64_t.

If we can design ppoll() any way we like, which seems likely, I would
prefer having the timeout given as an absolute timestamp. It would
save some gettimeofday() (or clock_gettime()) syscalls and simplify
user code in common cases.

If I'm not mistaken, sem_timedwait() and pthread_cond_timedwait() were
designed to take an absolute timeout for this reason.

^ permalink raw reply	[flat|nested] 45+ messages in thread

end of thread, other threads:[~2005-08-26  6:47 UTC | newest]

Thread overview: 45+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-06-10 22:58 Add pselect, ppoll system calls David Woodhouse
2005-06-12 22:48 ` Alan Cox
2005-06-13  0:26   ` Linus Torvalds
2005-06-13  1:16     ` jnf
2005-06-13  3:26       ` Linus Torvalds
2005-06-13  6:22         ` Ulrich Drepper
2005-06-13  9:16           ` bert hubert
2005-06-13  9:41             ` David Woodhouse
2005-06-13 11:05               ` Christoph Hellwig
2005-06-13 11:14                 ` Jakub Jelinek
2005-06-13 11:24                   ` David Woodhouse
2005-06-13 15:38                     ` Ulrich Drepper
2005-06-13 16:02                       ` David Woodhouse
2005-06-13 16:10                         ` Ulrich Drepper
2005-06-13 16:29                           ` David Woodhouse
2005-06-13 16:31                             ` Ulrich Drepper
2005-06-13 21:58                               ` David Woodhouse
2005-06-13 22:01                                 ` Ulrich Drepper
2005-06-13 16:36                             ` Jakub Jelinek
2005-06-15 11:36                       ` David Woodhouse
2005-08-05 10:42                         ` pselect() modifying timeout Michael Kerrisk
2005-08-05 14:50                           ` Ulrich Drepper
2005-08-05 20:08                             ` Michael Kerrisk
2005-08-08 11:10                           ` Alan Cox
2005-08-05 11:58                         ` Add pselect, ppoll system calls Michael Kerrisk
2005-08-05 12:49                           ` Michael Kerrisk
2005-08-25  0:04                             ` David Woodhouse
2005-08-25  0:34                               ` Ulrich Drepper
2005-08-26  6:46                               ` Michael Kerrisk
2005-06-13 15:35                 ` Ulrich Drepper
2005-06-13  7:23         ` Benjamin Herrenschmidt
2005-06-13  7:29           ` David Woodhouse
2005-06-13 22:56             ` Benjamin Herrenschmidt
2005-06-13 11:15     ` Alan Cox
2005-06-13 11:27       ` David Woodhouse
2005-06-13 15:40       ` Ulrich Drepper
2005-06-13 19:38 Mattias Engdegård
2005-06-13 19:57 ` Chris Friesen
2005-06-13 20:08   ` Mattias Engdegård
2005-06-13 20:23     ` Valdis.Kletnieks
2005-06-14 14:07       ` Mattias Engdegård
2005-06-14 14:16         ` Bernd Petrovitsch
2005-06-14 14:42           ` Mattias Engdegård
2005-06-14 14:54             ` Bernd Petrovitsch
2005-06-14 15:27         ` Valdis.Kletnieks

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).