linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] reintroduce accept4
@ 2008-10-26 16:41 Ulrich Drepper
  2008-10-28  3:41 ` Andrew Morton
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Ulrich Drepper @ 2008-10-26 16:41 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, torvalds

This patch reintroduces accept4, replacing paccept.  It's easy to see that
the patch only removes code and then redirects existing code away from the
removed functions.  Since the paccept code sans signal handling was never
in question I think there is no reason to quarantine the patch first.

I've updated the test program which now looks as follows:

#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>

#ifdef __x86_64__
#define __NR_accept4 288
#define SOCK_CLOEXEC O_CLOEXEC
#elif __i386__
#define SYS_ACCEPT4     18
#define USE_SOCKETCALL 1
#define SOCK_CLOEXEC O_CLOEXEC
#else
#error "define syscall numbers for this architecture"
#endif

#define PORT 57392

static pthread_barrier_t b;

static void *
tf (void *arg)
{
  pthread_barrier_wait (&b);
  int s = socket (AF_INET, SOCK_STREAM, 0);
  struct sockaddr_in sin;
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  pthread_barrier_wait (&b);
  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT + 1);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  return NULL;
}

int
main (void)
{
  alarm (5);

  int status = 0;

  pthread_barrier_init (&b, NULL, 2);

  pthread_t th;
  if (pthread_create (&th, NULL, tf, NULL) != 0)
    {
      puts ("pthread_create failed");
      status = 1;
    }
  else
    {
      int s = socket (AF_INET, SOCK_STREAM, 0);
      int fl = 1;
      setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &fl, sizeof (fl));
      struct sockaddr_in sin;
      sin.sin_family = AF_INET;
      sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
      sin.sin_port = htons (PORT);
      bind (s, (struct sockaddr *) &sin, sizeof (sin));
      listen (s, SOMAXCONN);

      pthread_barrier_wait (&b);

      int s2 = accept (s, NULL, 0);
      if (s2 < 0)
        {
          puts ("accept failed");
          status = 1;
        }
      else
        {
          int fl = fcntl (s2, F_GETFD);
          if ((fl & FD_CLOEXEC) != 0)
            {
              puts ("accept did set CLOEXEC");
              status = 1;
            }

          close (s2);
        }

      close (s);

      pthread_barrier_wait (&b);

      sin.sin_family = AF_INET;
      sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
      sin.sin_port = htons (PORT + 1);
      s = socket (AF_INET, SOCK_STREAM, 0);
      bind (s, (struct sockaddr *) &sin, sizeof (sin));
      listen (s, SOMAXCONN);

      pthread_barrier_wait (&b);

#if USE_SOCKETCALL
      s2 = syscall (__NR_socketcall, SYS_ACCEPT4, s, NULL, 0, SOCK_CLOEXEC);
#else
      s2 = syscall (__NR_accept4, s, NULL, 0, SOCK_CLOEXEC);
#endif
      if (s2 < 0)
        {
          puts ("accept4 failed");
          status = 1;
        }
      else
        {
          int fl = fcntl (s2, F_GETFD);
          if ((fl & FD_CLOEXEC) == 0)
            {
              puts ("accept4 did not set CLOEXEC");
              status = 1;
            }

          close (s2);
        }

      close (s);
    }

  return status;
}


 arch/x86/include/asm/unistd_64.h |    4 -
 include/linux/net.h              |    6 --
 include/linux/syscalls.h         |    3 -
 kernel/sys_ni.c                  |    2 
 net/compat.c                     |   50 ++----------------------
 net/socket.c                     |   80 ++++-----------------------------------
 6 files changed, 21 insertions(+), 124 deletions(-)


Signed-off-by: Ulrich Drepper <drepper@redhat.com>

diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 834b2c1..d2e415e 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
 __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
 #define __NR_timerfd_gettime			287
 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
-#define __NR_paccept				288
-__SYSCALL(__NR_paccept, sys_paccept)
+#define __NR_accept4				288
+__SYSCALL(__NR_accept4, sys_accept4)
 #define __NR_signalfd4				289
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
 #define __NR_eventfd2				290
diff --git a/include/linux/net.h b/include/linux/net.h
index 6dc14a2..4515efa 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -40,7 +40,7 @@
 #define SYS_GETSOCKOPT	15		/* sys_getsockopt(2)		*/
 #define SYS_SENDMSG	16		/* sys_sendmsg(2)		*/
 #define SYS_RECVMSG	17		/* sys_recvmsg(2)		*/
-#define SYS_PACCEPT	18		/* sys_paccept(2)		*/
+#define SYS_ACCEPT4	18		/* sys_accept4(2)		*/
 
 typedef enum {
 	SS_FREE = 0,			/* not allocated		*/
@@ -100,7 +100,7 @@ enum sock_type {
  * remaining bits are used as flags. */
 #define SOCK_TYPE_MASK 0xf
 
-/* Flags for socket, socketpair, paccept */
+/* Flags for socket, socketpair, accept4 */
 #define SOCK_CLOEXEC	O_CLOEXEC
 #ifndef SOCK_NONBLOCK
 #define SOCK_NONBLOCK	O_NONBLOCK
@@ -223,8 +223,6 @@ extern int 	     sock_map_fd(struct socket *sock, int flags);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
-extern long	     do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
-			       int __user *upeer_addrlen, int flags);
 
 #define net_random()		random32()
 #define net_srandom(seed)	srandom32((__force u32)seed)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d6ff145..04fb47b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -410,8 +410,7 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname,
 asmlinkage long sys_bind(int, struct sockaddr __user *, int);
 asmlinkage long sys_connect(int, struct sockaddr __user *, int);
 asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
-asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
-			    const __user sigset_t *, size_t, int);
+asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int);
 asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index a77b27b..e14a232 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
 cond_syscall(sys_bind);
 cond_syscall(sys_listen);
 cond_syscall(sys_accept);
-cond_syscall(sys_paccept);
+cond_syscall(sys_accept4);
 cond_syscall(sys_connect);
 cond_syscall(sys_getsockname);
 cond_syscall(sys_getpeername);
diff --git a/net/compat.c b/net/compat.c
index 67fb6a3..50ce0a9 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
 static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
 				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-				AL(6)};
+				AL(4)};
 #undef AL
 
 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns
 	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
-asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
-				   int __user *upeer_addrlen,
-				   const compat_sigset_t __user *sigmask,
-				   compat_size_t sigsetsize, int flags)
-{
-	compat_sigset_t ss32;
-	sigset_t ksigmask, sigsaved;
-	int ret;
-
-	if (sigmask) {
-		if (sigsetsize != sizeof(compat_sigset_t))
-			return -EINVAL;
-		if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
-			return -EFAULT;
-		sigset_from_compat(&ksigmask, &ss32);
-
-		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
-		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
-	}
-
-	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-
-	if (ret == -ERESTARTNOHAND) {
-		/*
-		 * Don't restore the signal mask yet. Let do_signal() deliver
-		 * the signal on the way back to userspace, before the signal
-		 * mask is restored.
-		 */
-		if (sigmask) {
-			memcpy(&current->saved_sigmask, &sigsaved,
-			       sizeof(sigsaved));
-			set_restore_sigmask();
-		}
-	} else if (sigmask)
-		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
-	return ret;
-}
-
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 {
 	int ret;
 	u32 a[6];
 	u32 a0, a1;
 
-	if (call < SYS_SOCKET || call > SYS_PACCEPT)
+	if (call < SYS_SOCKET || call > SYS_ACCEPT4)
 		return -EINVAL;
 	if (copy_from_user(a, args, nas[call]))
 		return -EFAULT;
@@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 		ret = sys_listen(a0, a1);
 		break;
 	case SYS_ACCEPT:
-		ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
+		ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
 		break;
 	case SYS_GETSOCKNAME:
 		ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
@@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	case SYS_RECVMSG:
 		ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
 		break;
-	case SYS_PACCEPT:
-		ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]),
-					 compat_ptr(a[3]), a[4], a[5]);
+	case SYS_ACCEPT4:
+		ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
 		break;
 	default:
 		ret = -EINVAL;
diff --git a/net/socket.c b/net/socket.c
index 2b7a4b5..8e72806 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1427,8 +1427,8 @@ asmlinkage long sys_listen(int fd, int backlog)
  *	clean when we restucture accept also.
  */
 
-long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
-	       int __user *upeer_addrlen, int flags)
+asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
+			    int __user *upeer_addrlen, int flags)
 {
 	struct socket *sock, *newsock;
 	struct file *newfile;
@@ -1511,66 +1511,10 @@ out_fd:
 	goto out_put;
 }
 
-#if 0
-#ifdef HAVE_SET_RESTORE_SIGMASK
-asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
-			    int __user *upeer_addrlen,
-			    const sigset_t __user *sigmask,
-			    size_t sigsetsize, int flags)
-{
-	sigset_t ksigmask, sigsaved;
-	int ret;
-
-	if (sigmask) {
-		/* XXX: Don't preclude handling different sized sigset_t's.  */
-		if (sigsetsize != sizeof(sigset_t))
-			return -EINVAL;
-		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
-			return -EFAULT;
-
-		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
-		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
-        }
-
-	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-
-	if (ret < 0 && signal_pending(current)) {
-		/*
-		 * Don't restore the signal mask yet. Let do_signal() deliver
-		 * the signal on the way back to userspace, before the signal
-		 * mask is restored.
-		 */
-		if (sigmask) {
-			memcpy(&current->saved_sigmask, &sigsaved,
-			       sizeof(sigsaved));
-			set_restore_sigmask();
-		}
-	} else if (sigmask)
-		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
-	return ret;
-}
-#else
-asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
-			    int __user *upeer_addrlen,
-			    const sigset_t __user *sigmask,
-			    size_t sigsetsize, int flags)
-{
-	/* The platform does not support restoring the signal mask in the
-	 * return path.  So we do not allow using paccept() with a signal
-	 * mask.  */
-	if (sigmask)
-		return -EINVAL;
-
-	return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-}
-#endif
-#endif
-
 asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 			   int __user *upeer_addrlen)
 {
-	return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
+	return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
 }
 
 /*
@@ -2097,7 +2041,7 @@ static const unsigned char nargs[19]={
 	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
 	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-	AL(6)
+	AL(4)
 };
 
 #undef AL
@@ -2116,7 +2060,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 	unsigned long a0, a1;
 	int err;
 
-	if (call < 1 || call > SYS_PACCEPT)
+	if (call < 1 || call > SYS_ACCEPT4)
 		return -EINVAL;
 
 	/* copy_from_user should be SMP safe. */
@@ -2144,9 +2088,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 		err = sys_listen(a0, a1);
 		break;
 	case SYS_ACCEPT:
-		err =
-		    do_accept(a0, (struct sockaddr __user *)a1,
-			      (int __user *)a[2], 0);
+		err = sys_accept4(a0, (struct sockaddr __user *)a1,
+				  (int __user *)a[2], 0);
 		break;
 	case SYS_GETSOCKNAME:
 		err =
@@ -2193,12 +2136,9 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 	case SYS_RECVMSG:
 		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
-	case SYS_PACCEPT:
-		err =
-		    sys_paccept(a0, (struct sockaddr __user *)a1,
-			        (int __user *)a[2],
-				(const sigset_t __user *) a[3],
-				a[4], a[5]);
+	case SYS_ACCEPT4:
+		err = sys_accept4(a0, (struct sockaddr __user *)a1,
+				  (int __user *)a[2], a[3]);
 		break;
 	default:
 		err = -EINVAL;

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-10-26 16:41 [PATCH] reintroduce accept4 Ulrich Drepper
@ 2008-10-28  3:41 ` Andrew Morton
  2008-10-28  4:22   ` Ulrich Drepper
  2008-10-28 12:34 ` Michael Kerrisk
  2008-11-13 21:51 ` Michael Kerrisk
  2 siblings, 1 reply; 17+ messages in thread
From: Andrew Morton @ 2008-10-28  3:41 UTC (permalink / raw)
  To: Ulrich Drepper; +Cc: linux-kernel, torvalds, linux-api

(cc linux-api)

(cc linux-arch)

On Sun, 26 Oct 2008 12:41:50 -0400 Ulrich Drepper <drepper@redhat.com> wrote:

> This patch reintroduces accept4, replacing paccept.  It's easy to see that
> the patch only removes code and then redirects existing code away from the
> removed functions.  Since the paccept code sans signal handling was never
> in question I think there is no reason to quarantine the patch first.

I'll confess to not having a clue what's going on here.

What is accept4() and why do I want one?  Sigh.  Hopefully others have
been following more closely and have some context.

> I've updated the test program which now looks as follows:

> 
> #include <fcntl.h>
> #include <pthread.h>
> #include <stdio.h>
> #include <unistd.h>
> #include <netinet/in.h>
> #include <sys/socket.h>
> #include <sys/syscall.h>
> 
> #ifdef __x86_64__
> #define __NR_accept4 288
> #define SOCK_CLOEXEC O_CLOEXEC
> #elif __i386__
> #define SYS_ACCEPT4     18
> #define USE_SOCKETCALL 1
> #define SOCK_CLOEXEC O_CLOEXEC
> #else

Well.  This doesn't actually agree with the kernel patch.

>
> ...
>
>  arch/x86/include/asm/unistd_64.h |    4 -
>  include/linux/net.h              |    6 --
>  include/linux/syscalls.h         |    3 -
>  kernel/sys_ni.c                  |    2 
>  net/compat.c                     |   50 ++----------------------
>  net/socket.c                     |   80 ++++-----------------------------------

I'd suggest that i386 is sufficiently common to warrant its inclusion
in the initial patch.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-10-28  3:41 ` Andrew Morton
@ 2008-10-28  4:22   ` Ulrich Drepper
  2008-10-28  4:52     ` Andrew Morton
  0 siblings, 1 reply; 17+ messages in thread
From: Ulrich Drepper @ 2008-10-28  4:22 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, torvalds, linux-api

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Andrew Morton wrote:
> I'll confess to not having a clue what's going on here.

It has been discussed at length.  accept created file descriptors and we
need flags for tis.

>> #elif __i386__
>> #define SYS_ACCEPT4     18
>> #define USE_SOCKETCALL 1
>> #define SOCK_CLOEXEC O_CLOEXEC
>> #else
> 
> Well.  This doesn't actually agree with the kernel patch.

What doesn't agree?


> I'd suggest that i386 is sufficiently common to warrant its inclusion
> in the initial patch.

The x86 code is included.  x86 uses socketcall instead of a syscall.

I changed all paccept occurrences in the tree, not just x86-64.

- --
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org

iEYEARECAAYFAkkGk6MACgkQ2ijCOnn/RHSwSgCfeb/PGDCm2qy0ZESqWb8wgyhX
cHoAoIIXvhBjdxcj2gy09eQBzNKOTwCU
=QSFh
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-10-28  4:22   ` Ulrich Drepper
@ 2008-10-28  4:52     ` Andrew Morton
  0 siblings, 0 replies; 17+ messages in thread
From: Andrew Morton @ 2008-10-28  4:52 UTC (permalink / raw)
  To: Ulrich Drepper; +Cc: linux-kernel, torvalds, linux-api, linux-arch

(really does cc linux-arch this time)

On Mon, 27 Oct 2008 21:22:59 -0700 Ulrich Drepper <drepper@redhat.com> wrote:

> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
> 
> Andrew Morton wrote:
> > I'll confess to not having a clue what's going on here.
> 
> It has been discussed at length.

That's of little use to people trying to understand the git commit.

And it's of little use to people who write the documentation. 
Hopefully Michael has been following this closely enough to get by with
what we have here (ie: nothing).

And it's of little use to people who are trying to review this patch
and who haven't followed this alleged lengthy discussion. (ie: me)

And it's of little use to people who write for sites such as lwn.net,
http://kernelnewbies.org/LinuxChanges etc.

This is not some personal weekend hack project.  It is the Linux
kernel, used by millions around the world.

>  accept created file descriptors and we
> need flags for tis.
> 
> >> #elif __i386__
> >> #define SYS_ACCEPT4     18
> >> #define USE_SOCKETCALL 1
> >> #define SOCK_CLOEXEC O_CLOEXEC
> >> #else
> > 
> > Well.  This doesn't actually agree with the kernel patch.
> 
> What doesn't agree?
> 
> 
> > I'd suggest that i386 is sufficiently common to warrant its inclusion
> > in the initial patch.
> 
> The x86 code is included.  x86 uses socketcall instead of a syscall.

<wonders why arch/x86/include/asm/unistd_64.h defines
__ARCH_WANT_SYS_SOCKETCALL but doesn't wire up sys_socketcall()>

> I changed all paccept occurrences in the tree, not just x86-64.

OK, that covered a lot of architectures.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-10-26 16:41 [PATCH] reintroduce accept4 Ulrich Drepper
  2008-10-28  3:41 ` Andrew Morton
@ 2008-10-28 12:34 ` Michael Kerrisk
  2008-11-13 21:51 ` Michael Kerrisk
  2 siblings, 0 replies; 17+ messages in thread
From: Michael Kerrisk @ 2008-10-28 12:34 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: linux-kernel, akpm, torvalds, linux-api, linux-man,
	Davide Libenzi, netdev, Roland McGrath, Oleg Nesterov,
	Christoph Hellwig, David Miller, Alan Cox, Jakub Jelinek

[linux-api + many indiiduals added to CC]

Ulrich,

On Sun, Oct 26, 2008 at 11:41 AM, Ulrich Drepper <drepper@redhat.com> wrote:
> This patch reintroduces accept4, replacing paccept.  It's easy to see that
> the patch only removes code and then redirects existing code away from the
> removed functions.  Since the paccept code sans signal handling was never
> in question I think there is no reason to quarantine the patch first.

When you (re)submit a patch:

1. Please take the trouble to CC interested parties.  For example, the
people CCed in recent threads that discussed earlier versions of the
the patch.  This is basic LKML basic etiquette, since almost no one
reads all LKML, or reads it hourly.  Doing this allows interested
prties to comment on the patch, and also avoids patches hitting the
kernel "under the radar".

2. Don't assume anyone has cached earlier discussions of the topic.
Write a clear, complete description of the patch tht could b red by
someone new to the topic, something like a summary of
http://lwn.net/Articles/281965/ and http://udrepper.livejournal.com/20407.html

3. Explain what changes have been made from earlier versions of the
patch, and why.
E.g., some discussion thatsummarizes this:
http://thread.gmane.org/gmane.linux.network/106071

4. CC linux-api@vger.kernel.org on patches thatare API changes.  This
requirement was added to SubmitChecklist in the last few weeks.

5. CC me or linux-man@vger.kernel.org, so that something makes its way
into man-pages.

Cheers,

Michael

> I've updated the test program which now looks as follows:
>
> #include <fcntl.h>
> #include <pthread.h>
> #include <stdio.h>
> #include <unistd.h>
> #include <netinet/in.h>
> #include <sys/socket.h>
> #include <sys/syscall.h>
>
> #ifdef __x86_64__
> #define __NR_accept4 288
> #define SOCK_CLOEXEC O_CLOEXEC
> #elif __i386__
> #define SYS_ACCEPT4     18
> #define USE_SOCKETCALL 1
> #define SOCK_CLOEXEC O_CLOEXEC
> #else
> #error "define syscall numbers for this architecture"
> #endif
>
> #define PORT 57392
>
> static pthread_barrier_t b;
>
> static void *
> tf (void *arg)
> {
>  pthread_barrier_wait (&b);
>  int s = socket (AF_INET, SOCK_STREAM, 0);
>  struct sockaddr_in sin;
>  sin.sin_family = AF_INET;
>  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
>  sin.sin_port = htons (PORT);
>  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
>  close (s);
>  pthread_barrier_wait (&b);
>
>  pthread_barrier_wait (&b);
>  s = socket (AF_INET, SOCK_STREAM, 0);
>  sin.sin_family = AF_INET;
>  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
>  sin.sin_port = htons (PORT + 1);
>  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
>  close (s);
>  return NULL;
> }
>
> int
> main (void)
> {
>  alarm (5);
>
>  int status = 0;
>
>  pthread_barrier_init (&b, NULL, 2);
>
>  pthread_t th;
>  if (pthread_create (&th, NULL, tf, NULL) != 0)
>    {
>      puts ("pthread_create failed");
>      status = 1;
>    }
>  else
>    {
>      int s = socket (AF_INET, SOCK_STREAM, 0);
>      int fl = 1;
>      setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &fl, sizeof (fl));
>      struct sockaddr_in sin;
>      sin.sin_family = AF_INET;
>      sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
>      sin.sin_port = htons (PORT);
>      bind (s, (struct sockaddr *) &sin, sizeof (sin));
>      listen (s, SOMAXCONN);
>
>      pthread_barrier_wait (&b);
>
>      int s2 = accept (s, NULL, 0);
>      if (s2 < 0)
>        {
>          puts ("accept failed");
>          status = 1;
>        }
>      else
>        {
>          int fl = fcntl (s2, F_GETFD);
>          if ((fl & FD_CLOEXEC) != 0)
>            {
>              puts ("accept did set CLOEXEC");
>              status = 1;
>            }
>
>          close (s2);
>        }
>
>      close (s);
>
>      pthread_barrier_wait (&b);
>
>      sin.sin_family = AF_INET;
>      sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
>      sin.sin_port = htons (PORT + 1);
>      s = socket (AF_INET, SOCK_STREAM, 0);
>      bind (s, (struct sockaddr *) &sin, sizeof (sin));
>      listen (s, SOMAXCONN);
>
>      pthread_barrier_wait (&b);
>
> #if USE_SOCKETCALL
>      s2 = syscall (__NR_socketcall, SYS_ACCEPT4, s, NULL, 0, SOCK_CLOEXEC);
> #else
>      s2 = syscall (__NR_accept4, s, NULL, 0, SOCK_CLOEXEC);
> #endif
>      if (s2 < 0)
>        {
>          puts ("accept4 failed");
>          status = 1;
>        }
>      else
>        {
>          int fl = fcntl (s2, F_GETFD);
>          if ((fl & FD_CLOEXEC) == 0)
>            {
>              puts ("accept4 did not set CLOEXEC");
>              status = 1;
>            }
>
>          close (s2);
>        }
>
>      close (s);
>    }
>
>  return status;
> }
>
>
>  arch/x86/include/asm/unistd_64.h |    4 -
>  include/linux/net.h              |    6 --
>  include/linux/syscalls.h         |    3 -
>  kernel/sys_ni.c                  |    2
>  net/compat.c                     |   50 ++----------------------
>  net/socket.c                     |   80 ++++-----------------------------------
>  6 files changed, 21 insertions(+), 124 deletions(-)
>
>
> Signed-off-by: Ulrich Drepper <drepper@redhat.com>
>
> diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
> index 834b2c1..d2e415e 100644
> --- a/arch/x86/include/asm/unistd_64.h
> +++ b/arch/x86/include/asm/unistd_64.h
> @@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
>  __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
>  #define __NR_timerfd_gettime                   287
>  __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
> -#define __NR_paccept                           288
> -__SYSCALL(__NR_paccept, sys_paccept)
> +#define __NR_accept4                           288
> +__SYSCALL(__NR_accept4, sys_accept4)
>  #define __NR_signalfd4                         289
>  __SYSCALL(__NR_signalfd4, sys_signalfd4)
>  #define __NR_eventfd2                          290
> diff --git a/include/linux/net.h b/include/linux/net.h
> index 6dc14a2..4515efa 100644
> --- a/include/linux/net.h
> +++ b/include/linux/net.h
> @@ -40,7 +40,7 @@
>  #define SYS_GETSOCKOPT 15              /* sys_getsockopt(2)            */
>  #define SYS_SENDMSG    16              /* sys_sendmsg(2)               */
>  #define SYS_RECVMSG    17              /* sys_recvmsg(2)               */
> -#define SYS_PACCEPT    18              /* sys_paccept(2)               */
> +#define SYS_ACCEPT4    18              /* sys_accept4(2)               */
>
>  typedef enum {
>        SS_FREE = 0,                    /* not allocated                */
> @@ -100,7 +100,7 @@ enum sock_type {
>  * remaining bits are used as flags. */
>  #define SOCK_TYPE_MASK 0xf
>
> -/* Flags for socket, socketpair, paccept */
> +/* Flags for socket, socketpair, accept4 */
>  #define SOCK_CLOEXEC   O_CLOEXEC
>  #ifndef SOCK_NONBLOCK
>  #define SOCK_NONBLOCK  O_NONBLOCK
> @@ -223,8 +223,6 @@ extern int       sock_map_fd(struct socket *sock, int flags);
>  extern struct socket *sockfd_lookup(int fd, int *err);
>  #define                     sockfd_put(sock) fput(sock->file)
>  extern int          net_ratelimit(void);
> -extern long         do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
> -                              int __user *upeer_addrlen, int flags);
>
>  #define net_random()           random32()
>  #define net_srandom(seed)      srandom32((__force u32)seed)
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index d6ff145..04fb47b 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -410,8 +410,7 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname,
>  asmlinkage long sys_bind(int, struct sockaddr __user *, int);
>  asmlinkage long sys_connect(int, struct sockaddr __user *, int);
>  asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
> -asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
> -                           const __user sigset_t *, size_t, int);
> +asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int);
>  asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
>  asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
>  asmlinkage long sys_send(int, void __user *, size_t, unsigned);
> diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
> index a77b27b..e14a232 100644
> --- a/kernel/sys_ni.c
> +++ b/kernel/sys_ni.c
> @@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
>  cond_syscall(sys_bind);
>  cond_syscall(sys_listen);
>  cond_syscall(sys_accept);
> -cond_syscall(sys_paccept);
> +cond_syscall(sys_accept4);
>  cond_syscall(sys_connect);
>  cond_syscall(sys_getsockname);
>  cond_syscall(sys_getpeername);
> diff --git a/net/compat.c b/net/compat.c
> index 67fb6a3..50ce0a9 100644
> --- a/net/compat.c
> +++ b/net/compat.c
> @@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
>  static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
>                                AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
>                                AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
> -                               AL(6)};
> +                               AL(4)};
>  #undef AL
>
>  asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
> @@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns
>        return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
>  }
>
> -asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
> -                                  int __user *upeer_addrlen,
> -                                  const compat_sigset_t __user *sigmask,
> -                                  compat_size_t sigsetsize, int flags)
> -{
> -       compat_sigset_t ss32;
> -       sigset_t ksigmask, sigsaved;
> -       int ret;
> -
> -       if (sigmask) {
> -               if (sigsetsize != sizeof(compat_sigset_t))
> -                       return -EINVAL;
> -               if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
> -                       return -EFAULT;
> -               sigset_from_compat(&ksigmask, &ss32);
> -
> -               sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
> -               sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
> -       }
> -
> -       ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
> -
> -       if (ret == -ERESTARTNOHAND) {
> -               /*
> -                * Don't restore the signal mask yet. Let do_signal() deliver
> -                * the signal on the way back to userspace, before the signal
> -                * mask is restored.
> -                */
> -               if (sigmask) {
> -                       memcpy(&current->saved_sigmask, &sigsaved,
> -                              sizeof(sigsaved));
> -                       set_restore_sigmask();
> -               }
> -       } else if (sigmask)
> -               sigprocmask(SIG_SETMASK, &sigsaved, NULL);
> -
> -       return ret;
> -}
> -
>  asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
>  {
>        int ret;
>        u32 a[6];
>        u32 a0, a1;
>
> -       if (call < SYS_SOCKET || call > SYS_PACCEPT)
> +       if (call < SYS_SOCKET || call > SYS_ACCEPT4)
>                return -EINVAL;
>        if (copy_from_user(a, args, nas[call]))
>                return -EFAULT;
> @@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
>                ret = sys_listen(a0, a1);
>                break;
>        case SYS_ACCEPT:
> -               ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
> +               ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
>                break;
>        case SYS_GETSOCKNAME:
>                ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
> @@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
>        case SYS_RECVMSG:
>                ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
>                break;
> -       case SYS_PACCEPT:
> -               ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]),
> -                                        compat_ptr(a[3]), a[4], a[5]);
> +       case SYS_ACCEPT4:
> +               ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
>                break;
>        default:
>                ret = -EINVAL;
> diff --git a/net/socket.c b/net/socket.c
> index 2b7a4b5..8e72806 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -1427,8 +1427,8 @@ asmlinkage long sys_listen(int fd, int backlog)
>  *     clean when we restucture accept also.
>  */
>
> -long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
> -              int __user *upeer_addrlen, int flags)
> +asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
> +                           int __user *upeer_addrlen, int flags)
>  {
>        struct socket *sock, *newsock;
>        struct file *newfile;
> @@ -1511,66 +1511,10 @@ out_fd:
>        goto out_put;
>  }
>
> -#if 0
> -#ifdef HAVE_SET_RESTORE_SIGMASK
> -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
> -                           int __user *upeer_addrlen,
> -                           const sigset_t __user *sigmask,
> -                           size_t sigsetsize, int flags)
> -{
> -       sigset_t ksigmask, sigsaved;
> -       int ret;
> -
> -       if (sigmask) {
> -               /* XXX: Don't preclude handling different sized sigset_t's.  */
> -               if (sigsetsize != sizeof(sigset_t))
> -                       return -EINVAL;
> -               if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
> -                       return -EFAULT;
> -
> -               sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
> -               sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
> -        }
> -
> -       ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
> -
> -       if (ret < 0 && signal_pending(current)) {
> -               /*
> -                * Don't restore the signal mask yet. Let do_signal() deliver
> -                * the signal on the way back to userspace, before the signal
> -                * mask is restored.
> -                */
> -               if (sigmask) {
> -                       memcpy(&current->saved_sigmask, &sigsaved,
> -                              sizeof(sigsaved));
> -                       set_restore_sigmask();
> -               }
> -       } else if (sigmask)
> -               sigprocmask(SIG_SETMASK, &sigsaved, NULL);
> -
> -       return ret;
> -}
> -#else
> -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
> -                           int __user *upeer_addrlen,
> -                           const sigset_t __user *sigmask,
> -                           size_t sigsetsize, int flags)
> -{
> -       /* The platform does not support restoring the signal mask in the
> -        * return path.  So we do not allow using paccept() with a signal
> -        * mask.  */
> -       if (sigmask)
> -               return -EINVAL;
> -
> -       return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
> -}
> -#endif
> -#endif
> -
>  asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
>                           int __user *upeer_addrlen)
>  {
> -       return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
> +       return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
>  }
>
>  /*
> @@ -2097,7 +2041,7 @@ static const unsigned char nargs[19]={
>        AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
>        AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
>        AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
> -       AL(6)
> +       AL(4)
>  };
>
>  #undef AL
> @@ -2116,7 +2060,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
>        unsigned long a0, a1;
>        int err;
>
> -       if (call < 1 || call > SYS_PACCEPT)
> +       if (call < 1 || call > SYS_ACCEPT4)
>                return -EINVAL;
>
>        /* copy_from_user should be SMP safe. */
> @@ -2144,9 +2088,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
>                err = sys_listen(a0, a1);
>                break;
>        case SYS_ACCEPT:
> -               err =
> -                   do_accept(a0, (struct sockaddr __user *)a1,
> -                             (int __user *)a[2], 0);
> +               err = sys_accept4(a0, (struct sockaddr __user *)a1,
> +                                 (int __user *)a[2], 0);
>                break;
>        case SYS_GETSOCKNAME:
>                err =
> @@ -2193,12 +2136,9 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
>        case SYS_RECVMSG:
>                err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
>                break;
> -       case SYS_PACCEPT:
> -               err =
> -                   sys_paccept(a0, (struct sockaddr __user *)a1,
> -                               (int __user *)a[2],
> -                               (const sigset_t __user *) a[3],
> -                               a[4], a[5]);
> +       case SYS_ACCEPT4:
> +               err = sys_accept4(a0, (struct sockaddr __user *)a1,
> +                                 (int __user *)a[2], a[3]);
>                break;
>        default:
>                err = -EINVAL;
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>



-- 
Michael Kerrisk Linux man-pages maintainer;
http://www.kernel.org/doc/man-pages/ Found a documentation bug?
http://www.kernel.org/doc/man-pages/reporting_bugs.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-10-26 16:41 [PATCH] reintroduce accept4 Ulrich Drepper
  2008-10-28  3:41 ` Andrew Morton
  2008-10-28 12:34 ` Michael Kerrisk
@ 2008-11-13 21:51 ` Michael Kerrisk
  2008-11-13 22:02   ` Michael Kerrisk
  2008-11-13 22:05   ` Andrew Morton
  2 siblings, 2 replies; 17+ messages in thread
From: Michael Kerrisk @ 2008-11-13 21:51 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Subrata Modak, linux-arch, Ulrich Drepper, linux-kernel,
	torvalds, linux-api, linux-man, Davide Libenzi, netdev,
	Roland McGrath, Oleg Nesterov, Christoph Hellwig, David Miller,
	Alan Cox, Jakub Jelinek, Michael Kerrisk

Andrew,

On 10/26/08, Ulrich Drepper <drepper@redhat.com> wrote:
> This patch reintroduces accept4, replacing paccept.  It's easy to see that
> the patch only removes code and then redirects existing code away from the
> removed functions.  Since the paccept code sans signal handling was never
> in question I think there is no reason to quarantine the patch first.

I see you accepted this patch into -mm.  I've finally got to looking
at and testing this, so:

Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>

In my tests, everything looks fine.  I'll forward my test program in a
follow-up mail.

I think Ulrich wanted to try to see this patch in for 2.6.28; it's
past the merge window of course, so it's up to you, but I have no
problem with that.  The API is the one that Ulrich initially proposed,
before taking a detour into paccept()
(http://thread.gmane.org/gmane.linux.kernel/671443 ), which I argued
against (http://thread.gmane.org/gmane.linux.kernel/723952,
http://thread.gmane.org/gmane.linux.network/106071/), since I (and
Roland) could see no reason for the added complexity of a signal set
argument (like pselect()/ppoll()/epoll_pwait()).  (In any case, if
someone does come up with a compelling reason to add a sigset
argument, then we can add it via the use of a new flag bit.)

My only argument is with the name of the new sysytem call.

> I've updated the test program which now looks as follows:

(I assume that there had been no testing on x86-32, since, the
__i386__ ifdef's notwithstanding,  the program below can't work on
x86-32 -- sys_socketcall() takes its arguments packaged into an array
on x86-32, not as an inline list.)

Andrew, you noted a lack of explanation accompanying the original
patch.  Here's something to fill the gap, and which may be suitable
for the changelog.

==
Introduce a new accept4() system call.  The addition of this system
call matches analogous changes in 2.6.27 (dup3(), evenfd2(),
signalfd4(), inotify_init1(), epoll_create1(), pipe2()) which added
new system calls that differed from analogous traditional system calls
in adding a flags argument that can be used to access additional
functionality. The accept4() system call is exactly the same as
accept(), except that it adds a flags bit-mask argument.  Two flags
are initially implemented.  (Most of the new system calls in 2.6.27
also had both of these flags.)  SOCK_CLOEXEC causes the close-on-exec
(FD_CLOEXEC) flag to be enabled for the new file descriptor returned
by accept4().  This is a useful security feature to avoid leaking
information in a multithreaded program where one thread is doing an
accept() at the same time as another thread is doing a fork() plus
exec().  (More details here:
http://udrepper.livejournal.com/20407.html "Secure File Descriptor
Handling", Ulrich Drepper)  The other flag is SOCK_NONBLOCK, which
causes the O_NONBLOCK flag to be enabled on the new open file
description created by accept4().  (This flag is merely a convenience,
saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL)
to achieve the same result.)
==

For the curious, some further background on accept4() and the new
system calls in 2.6.27 is at
http://linux-man-pages.blogspot.com/2008/10/recent-changes-in-file-descriptor.html
and http://lwn.net/Articles/281965/)

Cheers,

Michael

> #include <fcntl.h>
> #include <pthread.h>
> #include <stdio.h>
> #include <unistd.h>
> #include <netinet/in.h>
> #include <sys/socket.h>
> #include <sys/syscall.h>
>
> #ifdef __x86_64__
> #define __NR_accept4 288
> #define SOCK_CLOEXEC O_CLOEXEC
> #elif __i386__
> #define SYS_ACCEPT4     18
> #define USE_SOCKETCALL 1
> #define SOCK_CLOEXEC O_CLOEXEC
> #else
> #error "define syscall numbers for this architecture"
> #endif
>
> #define PORT 57392
>
> static pthread_barrier_t b;
>
> static void *
> tf (void *arg)
> {
>   pthread_barrier_wait (&b);
>   int s = socket (AF_INET, SOCK_STREAM, 0);
>   struct sockaddr_in sin;
>   sin.sin_family = AF_INET;
>   sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
>   sin.sin_port = htons (PORT);
>   connect (s, (const struct sockaddr *) &sin, sizeof (sin));
>   close (s);
>   pthread_barrier_wait (&b);
>
>   pthread_barrier_wait (&b);
>   s = socket (AF_INET, SOCK_STREAM, 0);
>   sin.sin_family = AF_INET;
>   sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
>   sin.sin_port = htons (PORT + 1);
>   connect (s, (const struct sockaddr *) &sin, sizeof (sin));
>   close (s);
>   return NULL;
> }
>
> int
> main (void)
> {
>   alarm (5);
>
>   int status = 0;
>
>   pthread_barrier_init (&b, NULL, 2);
>
>   pthread_t th;
>   if (pthread_create (&th, NULL, tf, NULL) != 0)
>     {
>       puts ("pthread_create failed");
>       status = 1;
>     }
>   else
>     {
>       int s = socket (AF_INET, SOCK_STREAM, 0);
>       int fl = 1;
>       setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &fl, sizeof (fl));
>       struct sockaddr_in sin;
>       sin.sin_family = AF_INET;
>       sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
>       sin.sin_port = htons (PORT);
>       bind (s, (struct sockaddr *) &sin, sizeof (sin));
>       listen (s, SOMAXCONN);
>
>       pthread_barrier_wait (&b);
>
>       int s2 = accept (s, NULL, 0);
>       if (s2 < 0)
>         {
>           puts ("accept failed");
>           status = 1;
>         }
>       else
>         {
>           int fl = fcntl (s2, F_GETFD);
>           if ((fl & FD_CLOEXEC) != 0)
>             {
>               puts ("accept did set CLOEXEC");
>               status = 1;
>             }
>
>           close (s2);
>         }
>
>       close (s);
>
>       pthread_barrier_wait (&b);
>
>       sin.sin_family = AF_INET;
>       sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
>       sin.sin_port = htons (PORT + 1);
>       s = socket (AF_INET, SOCK_STREAM, 0);
>       bind (s, (struct sockaddr *) &sin, sizeof (sin));
>       listen (s, SOMAXCONN);
>
>       pthread_barrier_wait (&b);
>
> #if USE_SOCKETCALL
>       s2 = syscall (__NR_socketcall, SYS_ACCEPT4, s, NULL, 0, SOCK_CLOEXEC);
> #else
>       s2 = syscall (__NR_accept4, s, NULL, 0, SOCK_CLOEXEC);
> #endif
>       if (s2 < 0)
>         {
>           puts ("accept4 failed");
>           status = 1;
>         }
>       else
>         {
>           int fl = fcntl (s2, F_GETFD);
>           if ((fl & FD_CLOEXEC) == 0)
>             {
>               puts ("accept4 did not set CLOEXEC");
>               status = 1;
>             }
>
>           close (s2);
>         }
>
>       close (s);
>     }
>
>   return status;
> }
>
>
>  arch/x86/include/asm/unistd_64.h |    4 -
>  include/linux/net.h              |    6 --
>  include/linux/syscalls.h         |    3 -
>  kernel/sys_ni.c                  |    2
>  net/compat.c                     |   50 ++----------------------
>  net/socket.c                     |   80
> ++++-----------------------------------
>  6 files changed, 21 insertions(+), 124 deletions(-)
>
>
> Signed-off-by: Ulrich Drepper <drepper@redhat.com>
>
> diff --git a/arch/x86/include/asm/unistd_64.h
> b/arch/x86/include/asm/unistd_64.h
> index 834b2c1..d2e415e 100644
> --- a/arch/x86/include/asm/unistd_64.h
> +++ b/arch/x86/include/asm/unistd_64.h
> @@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
>  __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
>  #define __NR_timerfd_gettime			287
>  __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
> -#define __NR_paccept				288
> -__SYSCALL(__NR_paccept, sys_paccept)
> +#define __NR_accept4				288
> +__SYSCALL(__NR_accept4, sys_accept4)
>  #define __NR_signalfd4				289
>  __SYSCALL(__NR_signalfd4, sys_signalfd4)
>  #define __NR_eventfd2				290
> diff --git a/include/linux/net.h b/include/linux/net.h
> index 6dc14a2..4515efa 100644
> --- a/include/linux/net.h
> +++ b/include/linux/net.h
> @@ -40,7 +40,7 @@
>  #define SYS_GETSOCKOPT	15		/* sys_getsockopt(2)		*/
>  #define SYS_SENDMSG	16		/* sys_sendmsg(2)		*/
>  #define SYS_RECVMSG	17		/* sys_recvmsg(2)		*/
> -#define SYS_PACCEPT	18		/* sys_paccept(2)		*/
> +#define SYS_ACCEPT4	18		/* sys_accept4(2)		*/
>
>  typedef enum {
>  	SS_FREE = 0,			/* not allocated		*/
> @@ -100,7 +100,7 @@ enum sock_type {
>   * remaining bits are used as flags. */
>  #define SOCK_TYPE_MASK 0xf
>
> -/* Flags for socket, socketpair, paccept */
> +/* Flags for socket, socketpair, accept4 */
>  #define SOCK_CLOEXEC	O_CLOEXEC
>  #ifndef SOCK_NONBLOCK
>  #define SOCK_NONBLOCK	O_NONBLOCK
> @@ -223,8 +223,6 @@ extern int 	     sock_map_fd(struct socket *sock, int
> flags);
>  extern struct socket *sockfd_lookup(int fd, int *err);
>  #define		     sockfd_put(sock) fput(sock->file)
>  extern int	     net_ratelimit(void);
> -extern long	     do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
> -			       int __user *upeer_addrlen, int flags);
>
>  #define net_random()		random32()
>  #define net_srandom(seed)	srandom32((__force u32)seed)
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index d6ff145..04fb47b 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -410,8 +410,7 @@ asmlinkage long sys_getsockopt(int fd, int level, int
> optname,
>  asmlinkage long sys_bind(int, struct sockaddr __user *, int);
>  asmlinkage long sys_connect(int, struct sockaddr __user *, int);
>  asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
> -asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
> -			    const __user sigset_t *, size_t, int);
> +asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *,
> int);
>  asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user
> *);
>  asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user
> *);
>  asmlinkage long sys_send(int, void __user *, size_t, unsigned);
> diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
> index a77b27b..e14a232 100644
> --- a/kernel/sys_ni.c
> +++ b/kernel/sys_ni.c
> @@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
>  cond_syscall(sys_bind);
>  cond_syscall(sys_listen);
>  cond_syscall(sys_accept);
> -cond_syscall(sys_paccept);
> +cond_syscall(sys_accept4);
>  cond_syscall(sys_connect);
>  cond_syscall(sys_getsockname);
>  cond_syscall(sys_getpeername);
> diff --git a/net/compat.c b/net/compat.c
> index 67fb6a3..50ce0a9 100644
> --- a/net/compat.c
> +++ b/net/compat.c
> @@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
>  static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
>  				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
>  				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
> -				AL(6)};
> +				AL(4)};
>  #undef AL
>
>  asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user
> *msg, unsigned flags)
> @@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int fd, struct
> compat_msghdr __user *msg, uns
>  	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags |
> MSG_CMSG_COMPAT);
>  }
>
> -asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user
> *upeer_sockaddr,
> -				   int __user *upeer_addrlen,
> -				   const compat_sigset_t __user *sigmask,
> -				   compat_size_t sigsetsize, int flags)
> -{
> -	compat_sigset_t ss32;
> -	sigset_t ksigmask, sigsaved;
> -	int ret;
> -
> -	if (sigmask) {
> -		if (sigsetsize != sizeof(compat_sigset_t))
> -			return -EINVAL;
> -		if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
> -			return -EFAULT;
> -		sigset_from_compat(&ksigmask, &ss32);
> -
> -		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
> -		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
> -	}
> -
> -	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
> -
> -	if (ret == -ERESTARTNOHAND) {
> -		/*
> -		 * Don't restore the signal mask yet. Let do_signal() deliver
> -		 * the signal on the way back to userspace, before the signal
> -		 * mask is restored.
> -		 */
> -		if (sigmask) {
> -			memcpy(&current->saved_sigmask, &sigsaved,
> -			       sizeof(sigsaved));
> -			set_restore_sigmask();
> -		}
> -	} else if (sigmask)
> -		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
> -
> -	return ret;
> -}
> -
>  asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
>  {
>  	int ret;
>  	u32 a[6];
>  	u32 a0, a1;
>
> -	if (call < SYS_SOCKET || call > SYS_PACCEPT)
> +	if (call < SYS_SOCKET || call > SYS_ACCEPT4)
>  		return -EINVAL;
>  	if (copy_from_user(a, args, nas[call]))
>  		return -EFAULT;
> @@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(int call, u32
> __user *args)
>  		ret = sys_listen(a0, a1);
>  		break;
>  	case SYS_ACCEPT:
> -		ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
> +		ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
>  		break;
>  	case SYS_GETSOCKNAME:
>  		ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
> @@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(int call, u32
> __user *args)
>  	case SYS_RECVMSG:
>  		ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
>  		break;
> -	case SYS_PACCEPT:
> -		ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]),
> -					 compat_ptr(a[3]), a[4], a[5]);
> +	case SYS_ACCEPT4:
> +		ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
>  		break;
>  	default:
>  		ret = -EINVAL;
> diff --git a/net/socket.c b/net/socket.c
> index 2b7a4b5..8e72806 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -1427,8 +1427,8 @@ asmlinkage long sys_listen(int fd, int backlog)
>   *	clean when we restucture accept also.
>   */
>
> -long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
> -	       int __user *upeer_addrlen, int flags)
> +asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
> +			    int __user *upeer_addrlen, int flags)
>  {
>  	struct socket *sock, *newsock;
>  	struct file *newfile;
> @@ -1511,66 +1511,10 @@ out_fd:
>  	goto out_put;
>  }
>
> -#if 0
> -#ifdef HAVE_SET_RESTORE_SIGMASK
> -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
> -			    int __user *upeer_addrlen,
> -			    const sigset_t __user *sigmask,
> -			    size_t sigsetsize, int flags)
> -{
> -	sigset_t ksigmask, sigsaved;
> -	int ret;
> -
> -	if (sigmask) {
> -		/* XXX: Don't preclude handling different sized sigset_t's.  */
> -		if (sigsetsize != sizeof(sigset_t))
> -			return -EINVAL;
> -		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
> -			return -EFAULT;
> -
> -		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
> -		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
> -        }
> -
> -	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
> -
> -	if (ret < 0 && signal_pending(current)) {
> -		/*
> -		 * Don't restore the signal mask yet. Let do_signal() deliver
> -		 * the signal on the way back to userspace, before the signal
> -		 * mask is restored.
> -		 */
> -		if (sigmask) {
> -			memcpy(&current->saved_sigmask, &sigsaved,
> -			       sizeof(sigsaved));
> -			set_restore_sigmask();
> -		}
> -	} else if (sigmask)
> -		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
> -
> -	return ret;
> -}
> -#else
> -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
> -			    int __user *upeer_addrlen,
> -			    const sigset_t __user *sigmask,
> -			    size_t sigsetsize, int flags)
> -{
> -	/* The platform does not support restoring the signal mask in the
> -	 * return path.  So we do not allow using paccept() with a signal
> -	 * mask.  */
> -	if (sigmask)
> -		return -EINVAL;
> -
> -	return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
> -}
> -#endif
> -#endif
> -
>  asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
>  			   int __user *upeer_addrlen)
>  {
> -	return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
> +	return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
>  }
>
>  /*
> @@ -2097,7 +2041,7 @@ static const unsigned char nargs[19]={
>  	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
>  	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
>  	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
> -	AL(6)
> +	AL(4)
>  };
>
>  #undef AL
> @@ -2116,7 +2060,7 @@ asmlinkage long sys_socketcall(int call, unsigned long
> __user *args)
>  	unsigned long a0, a1;
>  	int err;
>
> -	if (call < 1 || call > SYS_PACCEPT)
> +	if (call < 1 || call > SYS_ACCEPT4)
>  		return -EINVAL;
>
>  	/* copy_from_user should be SMP safe. */
> @@ -2144,9 +2088,8 @@ asmlinkage long sys_socketcall(int call, unsigned long
> __user *args)
>  		err = sys_listen(a0, a1);
>  		break;
>  	case SYS_ACCEPT:
> -		err =
> -		    do_accept(a0, (struct sockaddr __user *)a1,
> -			      (int __user *)a[2], 0);
> +		err = sys_accept4(a0, (struct sockaddr __user *)a1,
> +				  (int __user *)a[2], 0);
>  		break;
>  	case SYS_GETSOCKNAME:
>  		err =
> @@ -2193,12 +2136,9 @@ asmlinkage long sys_socketcall(int call, unsigned
> long __user *args)
>  	case SYS_RECVMSG:
>  		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
>  		break;
> -	case SYS_PACCEPT:
> -		err =
> -		    sys_paccept(a0, (struct sockaddr __user *)a1,
> -			        (int __user *)a[2],
> -				(const sigset_t __user *) a[3],
> -				a[4], a[5]);
> +	case SYS_ACCEPT4:
> +		err = sys_accept4(a0, (struct sockaddr __user *)a1,
> +				  (int __user *)a[2], a[3]);
>  		break;
>  	default:
>  		err = -EINVAL;
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>


-- 
Michael Kerrisk Linux man-pages maintainer;
http://www.kernel.org/doc/man-pages/ Found a documentation bug?
http://www.kernel.org/doc/man-pages/reporting_bugs.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-11-13 21:51 ` Michael Kerrisk
@ 2008-11-13 22:02   ` Michael Kerrisk
  2008-11-13 22:11     ` Michael Kerrisk
  2008-11-13 22:05   ` Andrew Morton
  1 sibling, 1 reply; 17+ messages in thread
From: Michael Kerrisk @ 2008-11-13 22:02 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Subrata Modak, linux-arch, Ulrich Drepper, linux-kernel,
	torvalds, linux-api, linux-man, Davide Libenzi, netdev,
	Roland McGrath, Oleg Nesterov, Christoph Hellwig, David Miller,
	Alan Cox, Jakub Jelinek

Hit the send button a little early on my last mail.  Just to complete one piece:

> My only argument is with the name of the new sysytem call.

... Each of these new system calls (accept4(), dup3(), evenfd2(),
signalfd4(), inotify_init1(), epoll_create1(), pipe2()) has a name
that's based on the number of arguments it has.  This follows a
convention that was used in a few traditional Unix system calls, e.g.,
wait3(), wait4(), dup2().  However, it's probably a mistake since:

a) The glibc interfaces can have different numbers of arguments from
the system call

b) In the future, we might use the new bits in the flags argument to
signal the presence of additional arguments for the call, in which
case the number in the name no longer matches the number of arguments
in the call signature.

In the end, names like acceptx(), dupx(), ... or acceptfl(), duplf(),
... or somesuch would probably have been better.  But given that we
already added the other system calls, it doesn't seem worth bothering
to change things for accept4().

Cheers,

Michael

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-11-13 21:51 ` Michael Kerrisk
  2008-11-13 22:02   ` Michael Kerrisk
@ 2008-11-13 22:05   ` Andrew Morton
  2008-11-13 22:25     ` Paul Mackerras
  2008-11-14 15:24     ` Michael Kerrisk
  1 sibling, 2 replies; 17+ messages in thread
From: Andrew Morton @ 2008-11-13 22:05 UTC (permalink / raw)
  To: Michael Kerrisk
  Cc: subrata, linux-arch, drepper, linux-kernel, torvalds, linux-api,
	linux-man, davidel, netdev, roland, oleg, hch, davem, alan,
	jakub, mtk.manpages

On Thu, 13 Nov 2008 16:51:56 -0500
"Michael Kerrisk" <mtk.manpages@gmail.com> wrote:

> Andrew,
> 
> On 10/26/08, Ulrich Drepper <drepper@redhat.com> wrote:
> > This patch reintroduces accept4, replacing paccept.  It's easy to see that
> > the patch only removes code and then redirects existing code away from the
> > removed functions.  Since the paccept code sans signal handling was never
> > in question I think there is no reason to quarantine the patch first.
> 
> I see you accepted this patch into -mm.  I've finally got to looking
> at and testing this, so:
> 
> Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
> Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>

Cool, thanks.

> In my tests, everything looks fine.  I'll forward my test program in a
> follow-up mail.

OK, I'll add that to the changelog as well.

> I think Ulrich wanted to try to see this patch in for 2.6.28; it's
> past the merge window of course, so it's up to you, but I have no
> problem with that.

That's easy - I'll send it to Linus and let him decide ;)

Realistically, this isn't likely to get much third-party testing in -rc
anyway.  Our best defence at this time is careful review and developer
runtime testing, which you've done, thanks.

If it's buggy, we can live with that - fix it later, backport the
fixes.  It's security holes (including DoS ones) which we need to be
most concerned about.

>  The API is the one that Ulrich initially proposed,
> before taking a detour into paccept()
> (http://thread.gmane.org/gmane.linux.kernel/671443 ), which I argued
> against (http://thread.gmane.org/gmane.linux.kernel/723952,
> http://thread.gmane.org/gmane.linux.network/106071/), since I (and
> Roland) could see no reason for the added complexity of a signal set
> argument (like pselect()/ppoll()/epoll_pwait()).  (In any case, if
> someone does come up with a compelling reason to add a sigset
> argument, then we can add it via the use of a new flag bit.)
> 
> My only argument is with the name of the new sysytem call.
> 
> > I've updated the test program which now looks as follows:
> 
> (I assume that there had been no testing on x86-32, since, the
> __i386__ ifdef's notwithstanding,  the program below can't work on
> x86-32 -- sys_socketcall() takes its arguments packaged into an array
> on x86-32, not as an inline list.)
> 
> Andrew, you noted a lack of explanation accompanying the original
> patch.  Here's something to fill the gap, and which may be suitable
> for the changelog.
> 
> ==
> Introduce a new accept4() system call.  The addition of this system
> call matches analogous changes in 2.6.27 (dup3(), evenfd2(),
> signalfd4(), inotify_init1(), epoll_create1(), pipe2()) which added
> new system calls that differed from analogous traditional system calls
> in adding a flags argument that can be used to access additional
> functionality. The accept4() system call is exactly the same as
> accept(), except that it adds a flags bit-mask argument.  Two flags
> are initially implemented.  (Most of the new system calls in 2.6.27
> also had both of these flags.)  SOCK_CLOEXEC causes the close-on-exec
> (FD_CLOEXEC) flag to be enabled for the new file descriptor returned
> by accept4().  This is a useful security feature to avoid leaking
> information in a multithreaded program where one thread is doing an
> accept() at the same time as another thread is doing a fork() plus
> exec().  (More details here:
> http://udrepper.livejournal.com/20407.html "Secure File Descriptor
> Handling", Ulrich Drepper)  The other flag is SOCK_NONBLOCK, which
> causes the O_NONBLOCK flag to be enabled on the new open file
> description created by accept4().  (This flag is merely a convenience,
> saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL)
> to achieve the same result.)

I replaced the existing changelog with the above (plus some paragraph
breaks ;)).  Will add the new test app when it comes along.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-11-13 22:02   ` Michael Kerrisk
@ 2008-11-13 22:11     ` Michael Kerrisk
  2008-11-13 22:14       ` Michael Kerrisk
  0 siblings, 1 reply; 17+ messages in thread
From: Michael Kerrisk @ 2008-11-13 22:11 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Subrata Modak, linux-arch, Ulrich Drepper, linux-kernel,
	torvalds, linux-api, linux-man, Davide Libenzi, netdev,
	Roland McGrath, Oleg Nesterov, Christoph Hellwig, David Miller,
	Alan Cox, Jakub Jelinek

Here's my test program.  Works on x86-32.  Should work on x86-64, but
I don't have a system to hand to test with.

It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies that
the appropriate flags are set on the file descriptor/open file
description returned by accept4().

I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.

/* test_accept4.c

   Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
        <mtk.manpages@gmail.com>

   Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>

#define PORT_NUM 33333

#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)

/**********************************************************************/

/* The following is what we need until glibc gets a wrapper for
   accept4() */

/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC    O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK   O_NONBLOCK
#endif

#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif

static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
    printf("Calling accept4(): flags = %x", flags);
    if (flags != 0) {
        printf(" (");
        if (flags & SOCK_CLOEXEC)
            printf("SOCK_CLOEXEC");
        if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
            printf(" ");
        if (flags & SOCK_NONBLOCK)
            printf("SOCK_NONBLOCK");
        printf(")");
    }
    printf("\n");

#if USE_SOCKETCALL
    long args[6];

    args[0] = fd;
    args[1] = (long) sockaddr;
    args[2] = (long) addrlen;
    args[3] = flags;

    printf("x86-32\n"); /* XXX */
    return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
    printf("x86-64\n"); /* XXX */
    return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}

/**********************************************************************/


static int
do_test(int lfd, struct sockaddr_in *conn_addr,
        int closeonexec_flag, int nonblock_flag)
{
    int connfd, acceptfd;
    int fdf, flf, fdf_pass, flf_pass;
    struct sockaddr_in claddr;
    socklen_t addrlen;

    printf("=======================================\n");

    connfd = socket(AF_INET, SOCK_STREAM, 0);
    if (connfd == -1)
        die("socket");
    if (connect(connfd, (struct sockaddr *) conn_addr,
                sizeof(struct sockaddr_in)) == -1)
        die("connect");

    addrlen = sizeof(struct sockaddr_in);
    acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
                       closeonexec_flag | nonblock_flag);
    if (acceptfd == -1) {
        perror("accept4()");
        close(connfd);
        return 0;
    }

    fdf = fcntl(acceptfd, F_GETFD);
    if (fdf == -1)
        die("fcntl:F_GETFD");
    fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
               ((closeonexec_flag & SOCK_CLOEXEC) != 0);
    printf("Close-on-exec flag is %sset (%s); ",
            (fdf & FD_CLOEXEC) ? "" : "not ",
            fdf_pass ? "OK" : "failed");

    flf = fcntl(acceptfd, F_GETFL);
    if (flf == -1)
        die("fcntl:F_GETFD");
    flf_pass = ((flf & O_NONBLOCK) != 0) ==
               ((nonblock_flag & SOCK_NONBLOCK) !=0);
    printf("nonblock flag is %sset (%s)\n",
            (flf & O_NONBLOCK) ? "" : "not ",
            flf_pass ? "OK" : "failed");

    close(acceptfd);
    close(connfd);

    printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
    return fdf_pass && flf_pass;
}


static int
create_listening_socket(int port_num)
{
    struct sockaddr_in svaddr;
    int lfd;
    int optval;

    memset(&svaddr, 0, sizeof(struct sockaddr_in));
    svaddr.sin_family = AF_INET;
    svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
    svaddr.sin_port = htons(port_num);

    lfd = socket(AF_INET, SOCK_STREAM, 0);
    if (lfd == -1)
        die("socket");

    optval = 1;
    if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
                   sizeof(optval)) == -1)
        die("setsockopt");

    if (bind(lfd, (struct sockaddr *) &svaddr,
             sizeof(struct sockaddr_in)) == -1)
        die("bind");

    if (listen(lfd, 5) == -1)
        die("listen");

    return lfd;
}


int
main(int argc, char *argv[])
{
    struct sockaddr_in conn_addr;
    int lfd;
    int port_num;
    int passed;

    passed = 1;

    port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;

    memset(&conn_addr, 0, sizeof(struct sockaddr_in));
    conn_addr.sin_family = AF_INET;
    conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    conn_addr.sin_port = htons(port_num);

    lfd = create_listening_socket(port_num);

    if (!do_test(lfd, &conn_addr, 0, 0))
        passed = 0;
    if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
        passed = 0;
    if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
        passed = 0;
    if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
        passed = 0;

    close(lfd);

    exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-11-13 22:11     ` Michael Kerrisk
@ 2008-11-13 22:14       ` Michael Kerrisk
  0 siblings, 0 replies; 17+ messages in thread
From: Michael Kerrisk @ 2008-11-13 22:14 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Subrata Modak, linux-arch, Ulrich Drepper, linux-kernel,
	torvalds, linux-api, linux-man, Davide Libenzi, netdev,
	Roland McGrath, Oleg Nesterov, Christoph Hellwig, David Miller,
	Alan Cox, Jakub Jelinek

Andrew,

There was a couplke of crufty printf()'s in the preceding version that
I didn't notice until just after I hit send.  Paste this version into
the changelog instead.

Cheers,

Michael
/* test_accept4.c

   Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
        <mtk.manpages@gmail.com>

   Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>

#define PORT_NUM 33333

#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)

/**********************************************************************/

/* The following is what we need until glibc gets a wrapper for
   accept4() */

/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC    O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK   O_NONBLOCK
#endif

#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif

static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
    printf("Calling accept4(): flags = %x", flags);
    if (flags != 0) {
        printf(" (");
        if (flags & SOCK_CLOEXEC)
            printf("SOCK_CLOEXEC");
        if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
            printf(" ");
        if (flags & SOCK_NONBLOCK)
            printf("SOCK_NONBLOCK");
        printf(")");
    }
    printf("\n");

#if USE_SOCKETCALL
    long args[6];

    args[0] = fd;
    args[1] = (long) sockaddr;
    args[2] = (long) addrlen;
    args[3] = flags;

    return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
    return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}

/**********************************************************************/


static int
do_test(int lfd, struct sockaddr_in *conn_addr,
        int closeonexec_flag, int nonblock_flag)
{
    int connfd, acceptfd;
    int fdf, flf, fdf_pass, flf_pass;
    struct sockaddr_in claddr;
    socklen_t addrlen;

    printf("=======================================\n");

    connfd = socket(AF_INET, SOCK_STREAM, 0);
    if (connfd == -1)
        die("socket");
    if (connect(connfd, (struct sockaddr *) conn_addr,
                sizeof(struct sockaddr_in)) == -1)
        die("connect");

    addrlen = sizeof(struct sockaddr_in);
    acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
                       closeonexec_flag | nonblock_flag);
    if (acceptfd == -1) {
        perror("accept4()");
        close(connfd);
        return 0;
    }

    fdf = fcntl(acceptfd, F_GETFD);
    if (fdf == -1)
        die("fcntl:F_GETFD");
    fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
               ((closeonexec_flag & SOCK_CLOEXEC) != 0);
    printf("Close-on-exec flag is %sset (%s); ",
            (fdf & FD_CLOEXEC) ? "" : "not ",
            fdf_pass ? "OK" : "failed");

    flf = fcntl(acceptfd, F_GETFL);
    if (flf == -1)
        die("fcntl:F_GETFD");
    flf_pass = ((flf & O_NONBLOCK) != 0) ==
               ((nonblock_flag & SOCK_NONBLOCK) !=0);
    printf("nonblock flag is %sset (%s)\n",
            (flf & O_NONBLOCK) ? "" : "not ",
            flf_pass ? "OK" : "failed");

    close(acceptfd);
    close(connfd);

    printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
    return fdf_pass && flf_pass;
}


static int
create_listening_socket(int port_num)
{
    struct sockaddr_in svaddr;
    int lfd;
    int optval;

    memset(&svaddr, 0, sizeof(struct sockaddr_in));
    svaddr.sin_family = AF_INET;
    svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
    svaddr.sin_port = htons(port_num);

    lfd = socket(AF_INET, SOCK_STREAM, 0);
    if (lfd == -1)
        die("socket");

    optval = 1;
    if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
                   sizeof(optval)) == -1)
        die("setsockopt");

    if (bind(lfd, (struct sockaddr *) &svaddr,
             sizeof(struct sockaddr_in)) == -1)
        die("bind");

    if (listen(lfd, 5) == -1)
        die("listen");

    return lfd;
}


int
main(int argc, char *argv[])
{
    struct sockaddr_in conn_addr;
    int lfd;
    int port_num;
    int passed;

    passed = 1;

    port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;

    memset(&conn_addr, 0, sizeof(struct sockaddr_in));
    conn_addr.sin_family = AF_INET;
    conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    conn_addr.sin_port = htons(port_num);

    lfd = create_listening_socket(port_num);

    if (!do_test(lfd, &conn_addr, 0, 0))
        passed = 0;
    if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
        passed = 0;
    if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
        passed = 0;
    if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
        passed = 0;

    close(lfd);

    exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-11-13 22:05   ` Andrew Morton
@ 2008-11-13 22:25     ` Paul Mackerras
  2008-11-13 22:28       ` Paul Mackerras
  2008-11-14 15:24     ` Michael Kerrisk
  1 sibling, 1 reply; 17+ messages in thread
From: Paul Mackerras @ 2008-11-13 22:25 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Michael Kerrisk, subrata, linux-arch, drepper, linux-kernel,
	torvalds, linux-api, linux-man, davidel, netdev, roland, oleg,
	hch, davem, alan, jakub

Andrew Morton writes:

> > I think Ulrich wanted to try to see this patch in for 2.6.28; it's
> > past the merge window of course, so it's up to you, but I have no
> > problem with that.
> 
> That's easy - I'll send it to Linus and let him decide ;)

Ulrich's patch only updated x86.  If you're going to send it to Linus,
please give us other architecture maintainers a chance to get patches
to you to wire it up on our architectures, and then send Linus the
combined patch.

Paul.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-11-13 22:25     ` Paul Mackerras
@ 2008-11-13 22:28       ` Paul Mackerras
  2008-11-13 22:57         ` Andrew Morton
  0 siblings, 1 reply; 17+ messages in thread
From: Paul Mackerras @ 2008-11-13 22:28 UTC (permalink / raw)
  To: Andrew Morton, Michael Kerrisk, subrata, linux-arch, drepper,
	linux-kernel, torvalds, linux-api, linux-man, davidel, netdev,
	roland, oleg, hch, davem, alan, jakub

I wrote:

> Andrew Morton writes:
> 
> > > I think Ulrich wanted to try to see this patch in for 2.6.28; it's
> > > past the merge window of course, so it's up to you, but I have no
> > > problem with that.
> > 
> > That's easy - I'll send it to Linus and let him decide ;)
> 
> Ulrich's patch only updated x86.  If you're going to send it to Linus,
> please give us other architecture maintainers a chance to get patches
> to you to wire it up on our architectures, and then send Linus the
> combined patch.

Actually, any architecture that uses sys_socketcall should be OK
already, by the looks, and that includes powerpc.

Paul.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-11-13 22:28       ` Paul Mackerras
@ 2008-11-13 22:57         ` Andrew Morton
  2008-11-14  0:07           ` David Miller
                             ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Andrew Morton @ 2008-11-13 22:57 UTC (permalink / raw)
  To: Paul Mackerras
  Cc: mtk.manpages, subrata, linux-arch, drepper, linux-kernel,
	torvalds, linux-api, linux-man, davidel, netdev, roland, oleg,
	hch, davem, alan, jakub

On Fri, 14 Nov 2008 09:28:39 +1100
Paul Mackerras <paulus@samba.org> wrote:

> I wrote:
> 
> > Andrew Morton writes:
> > 
> > > > I think Ulrich wanted to try to see this patch in for 2.6.28; it's
> > > > past the merge window of course, so it's up to you, but I have no
> > > > problem with that.
> > > 
> > > That's easy - I'll send it to Linus and let him decide ;)
> > 
> > Ulrich's patch only updated x86.  If you're going to send it to Linus,
> > please give us other architecture maintainers a chance to get patches
> > to you to wire it up on our architectures, and then send Linus the
> > combined patch.
> 
> Actually, any architecture that uses sys_socketcall should be OK
> already, by the looks, and that includes powerpc.
> 

Here's the latest version, for review-n-test enjoyment:

From: Ulrich Drepper <drepper@redhat.com>

Introduce a new accept4() system call.  The addition of this system call
matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
inotify_init1(), epoll_create1(), pipe2()) which added new system calls
that differed from analogous traditional system calls in adding a flags
argument that can be used to access additional functionality.

The accept4() system call is exactly the same as accept(), except that it
adds a flags bit-mask argument.  Two flags are initially implemented. 
(Most of the new system calls in 2.6.27 also had both of these flags.)
SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled for
the new file descriptor returned by accept4().  This is a useful security
feature to avoid leaking information in a multithreaded program where one
thread is doing an accept() at the same time as another thread is doing a
fork() plus exec().

More details here: http://udrepper.livejournal.com/20407.html "Secure File
Descriptor Handling", Ulrich Drepper) The other flag is SOCK_NONBLOCK,
which causes the O_NONBLOCK flag to be enabled on the new open file
description created by accept4().  (This flag is merely a convenience,
saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL) to
achieve the same result.

Here's my test program.  Works on x86-32.  Should work on x86-64, but
I don't have a system to hand to test with.

It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies that
the appropriate flags are set on the file descriptor/open file
description returned by accept4().

I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.

/* test_accept4.c

   Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
        <mtk.manpages@gmail.com>

   Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>

#define PORT_NUM 33333

#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)

/**********************************************************************/

/* The following is what we need until glibc gets a wrapper for
   accept4() */

/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC    O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK   O_NONBLOCK
#endif

#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif

static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
    printf("Calling accept4(): flags = %x", flags);
    if (flags != 0) {
        printf(" (");
        if (flags & SOCK_CLOEXEC)
            printf("SOCK_CLOEXEC");
        if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
            printf(" ");
        if (flags & SOCK_NONBLOCK)
            printf("SOCK_NONBLOCK");
        printf(")");
    }
    printf("\n");

#if USE_SOCKETCALL
    long args[6];

    args[0] = fd;
    args[1] = (long) sockaddr;
    args[2] = (long) addrlen;
    args[3] = flags;

    return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
    return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}

/**********************************************************************/


static int
do_test(int lfd, struct sockaddr_in *conn_addr,
        int closeonexec_flag, int nonblock_flag)
{
    int connfd, acceptfd;
    int fdf, flf, fdf_pass, flf_pass;
    struct sockaddr_in claddr;
    socklen_t addrlen;

    printf("=======================================\n");

    connfd = socket(AF_INET, SOCK_STREAM, 0);
    if (connfd == -1)
        die("socket");
    if (connect(connfd, (struct sockaddr *) conn_addr,
                sizeof(struct sockaddr_in)) == -1)
        die("connect");

    addrlen = sizeof(struct sockaddr_in);
    acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
                       closeonexec_flag | nonblock_flag);
    if (acceptfd == -1) {
        perror("accept4()");
        close(connfd);
        return 0;
    }

    fdf = fcntl(acceptfd, F_GETFD);
    if (fdf == -1)
        die("fcntl:F_GETFD");
    fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
               ((closeonexec_flag & SOCK_CLOEXEC) != 0);
    printf("Close-on-exec flag is %sset (%s); ",
            (fdf & FD_CLOEXEC) ? "" : "not ",
            fdf_pass ? "OK" : "failed");

    flf = fcntl(acceptfd, F_GETFL);
    if (flf == -1)
        die("fcntl:F_GETFD");
    flf_pass = ((flf & O_NONBLOCK) != 0) ==
               ((nonblock_flag & SOCK_NONBLOCK) !=0);
    printf("nonblock flag is %sset (%s)\n",
            (flf & O_NONBLOCK) ? "" : "not ",
            flf_pass ? "OK" : "failed");

    close(acceptfd);
    close(connfd);

    printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
    return fdf_pass && flf_pass;
}


static int
create_listening_socket(int port_num)
{
    struct sockaddr_in svaddr;
    int lfd;
    int optval;

    memset(&svaddr, 0, sizeof(struct sockaddr_in));
    svaddr.sin_family = AF_INET;
    svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
    svaddr.sin_port = htons(port_num);

    lfd = socket(AF_INET, SOCK_STREAM, 0);
    if (lfd == -1)
        die("socket");

    optval = 1;
    if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
                   sizeof(optval)) == -1)
        die("setsockopt");

    if (bind(lfd, (struct sockaddr *) &svaddr,
             sizeof(struct sockaddr_in)) == -1)
        die("bind");

    if (listen(lfd, 5) == -1)
        die("listen");

    return lfd;
}


int
main(int argc, char *argv[])
{
    struct sockaddr_in conn_addr;
    int lfd;
    int port_num;
    int passed;

    passed = 1;

    port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;

    memset(&conn_addr, 0, sizeof(struct sockaddr_in));
    conn_addr.sin_family = AF_INET;
    conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    conn_addr.sin_port = htons(port_num);

    lfd = create_listening_socket(port_num);

    if (!do_test(lfd, &conn_addr, 0, 0))
        passed = 0;
    if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
        passed = 0;
    if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
        passed = 0;
    if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
        passed = 0;

    close(lfd);

    exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}

[mtk.manpages@gmail.com: rewrote changelog, updated test program]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 arch/x86/include/asm/unistd_64.h |    4 -
 include/linux/net.h              |    6 --
 include/linux/syscalls.h         |    3 -
 kernel/sys_ni.c                  |    2 
 net/compat.c                     |   50 +-----------------
 net/socket.c                     |   80 +++--------------------------
 6 files changed, 21 insertions(+), 124 deletions(-)

diff -puN arch/x86/include/asm/unistd_64.h~reintroduce-accept4 arch/x86/include/asm/unistd_64.h
--- a/arch/x86/include/asm/unistd_64.h~reintroduce-accept4
+++ a/arch/x86/include/asm/unistd_64.h
@@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
 __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
 #define __NR_timerfd_gettime			287
 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
-#define __NR_paccept				288
-__SYSCALL(__NR_paccept, sys_paccept)
+#define __NR_accept4				288
+__SYSCALL(__NR_accept4, sys_accept4)
 #define __NR_signalfd4				289
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
 #define __NR_eventfd2				290
diff -puN include/linux/net.h~reintroduce-accept4 include/linux/net.h
--- a/include/linux/net.h~reintroduce-accept4
+++ a/include/linux/net.h
@@ -40,7 +40,7 @@
 #define SYS_GETSOCKOPT	15		/* sys_getsockopt(2)		*/
 #define SYS_SENDMSG	16		/* sys_sendmsg(2)		*/
 #define SYS_RECVMSG	17		/* sys_recvmsg(2)		*/
-#define SYS_PACCEPT	18		/* sys_paccept(2)		*/
+#define SYS_ACCEPT4	18		/* sys_accept4(2)		*/
 
 typedef enum {
 	SS_FREE = 0,			/* not allocated		*/
@@ -100,7 +100,7 @@ enum sock_type {
  * remaining bits are used as flags. */
 #define SOCK_TYPE_MASK 0xf
 
-/* Flags for socket, socketpair, paccept */
+/* Flags for socket, socketpair, accept4 */
 #define SOCK_CLOEXEC	O_CLOEXEC
 #ifndef SOCK_NONBLOCK
 #define SOCK_NONBLOCK	O_NONBLOCK
@@ -223,8 +223,6 @@ extern int 	     sock_map_fd(struct sock
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
-extern long	     do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
-			       int __user *upeer_addrlen, int flags);
 
 #define net_random()		random32()
 #define net_srandom(seed)	srandom32((__force u32)seed)
diff -puN include/linux/syscalls.h~reintroduce-accept4 include/linux/syscalls.h
--- a/include/linux/syscalls.h~reintroduce-accept4
+++ a/include/linux/syscalls.h
@@ -410,8 +410,7 @@ asmlinkage long sys_getsockopt(int fd, i
 asmlinkage long sys_bind(int, struct sockaddr __user *, int);
 asmlinkage long sys_connect(int, struct sockaddr __user *, int);
 asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
-asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
-			    const __user sigset_t *, size_t, int);
+asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int);
 asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
diff -puN kernel/sys_ni.c~reintroduce-accept4 kernel/sys_ni.c
--- a/kernel/sys_ni.c~reintroduce-accept4
+++ a/kernel/sys_ni.c
@@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
 cond_syscall(sys_bind);
 cond_syscall(sys_listen);
 cond_syscall(sys_accept);
-cond_syscall(sys_paccept);
+cond_syscall(sys_accept4);
 cond_syscall(sys_connect);
 cond_syscall(sys_getsockname);
 cond_syscall(sys_getpeername);
diff -puN net/compat.c~reintroduce-accept4 net/compat.c
--- a/net/compat.c~reintroduce-accept4
+++ a/net/compat.c
@@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
 static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
 				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-				AL(6)};
+				AL(4)};
 #undef AL
 
 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int f
 	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
-asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
-				   int __user *upeer_addrlen,
-				   const compat_sigset_t __user *sigmask,
-				   compat_size_t sigsetsize, int flags)
-{
-	compat_sigset_t ss32;
-	sigset_t ksigmask, sigsaved;
-	int ret;
-
-	if (sigmask) {
-		if (sigsetsize != sizeof(compat_sigset_t))
-			return -EINVAL;
-		if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
-			return -EFAULT;
-		sigset_from_compat(&ksigmask, &ss32);
-
-		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
-		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
-	}
-
-	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-
-	if (ret == -ERESTARTNOHAND) {
-		/*
-		 * Don't restore the signal mask yet. Let do_signal() deliver
-		 * the signal on the way back to userspace, before the signal
-		 * mask is restored.
-		 */
-		if (sigmask) {
-			memcpy(&current->saved_sigmask, &sigsaved,
-			       sizeof(sigsaved));
-			set_restore_sigmask();
-		}
-	} else if (sigmask)
-		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
-	return ret;
-}
-
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 {
 	int ret;
 	u32 a[6];
 	u32 a0, a1;
 
-	if (call < SYS_SOCKET || call > SYS_PACCEPT)
+	if (call < SYS_SOCKET || call > SYS_ACCEPT4)
 		return -EINVAL;
 	if (copy_from_user(a, args, nas[call]))
 		return -EFAULT;
@@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(in
 		ret = sys_listen(a0, a1);
 		break;
 	case SYS_ACCEPT:
-		ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
+		ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
 		break;
 	case SYS_GETSOCKNAME:
 		ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
@@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(in
 	case SYS_RECVMSG:
 		ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
 		break;
-	case SYS_PACCEPT:
-		ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]),
-					 compat_ptr(a[3]), a[4], a[5]);
+	case SYS_ACCEPT4:
+		ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
 		break;
 	default:
 		ret = -EINVAL;
diff -puN net/socket.c~reintroduce-accept4 net/socket.c
--- a/net/socket.c~reintroduce-accept4
+++ a/net/socket.c
@@ -1425,8 +1425,8 @@ asmlinkage long sys_listen(int fd, int b
  *	clean when we restucture accept also.
  */
 
-long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
-	       int __user *upeer_addrlen, int flags)
+asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
+			    int __user *upeer_addrlen, int flags)
 {
 	struct socket *sock, *newsock;
 	struct file *newfile;
@@ -1509,66 +1509,10 @@ out_fd:
 	goto out_put;
 }
 
-#if 0
-#ifdef HAVE_SET_RESTORE_SIGMASK
-asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
-			    int __user *upeer_addrlen,
-			    const sigset_t __user *sigmask,
-			    size_t sigsetsize, int flags)
-{
-	sigset_t ksigmask, sigsaved;
-	int ret;
-
-	if (sigmask) {
-		/* XXX: Don't preclude handling different sized sigset_t's.  */
-		if (sigsetsize != sizeof(sigset_t))
-			return -EINVAL;
-		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
-			return -EFAULT;
-
-		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
-		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
-        }
-
-	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-
-	if (ret < 0 && signal_pending(current)) {
-		/*
-		 * Don't restore the signal mask yet. Let do_signal() deliver
-		 * the signal on the way back to userspace, before the signal
-		 * mask is restored.
-		 */
-		if (sigmask) {
-			memcpy(&current->saved_sigmask, &sigsaved,
-			       sizeof(sigsaved));
-			set_restore_sigmask();
-		}
-	} else if (sigmask)
-		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
-	return ret;
-}
-#else
-asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
-			    int __user *upeer_addrlen,
-			    const sigset_t __user *sigmask,
-			    size_t sigsetsize, int flags)
-{
-	/* The platform does not support restoring the signal mask in the
-	 * return path.  So we do not allow using paccept() with a signal
-	 * mask.  */
-	if (sigmask)
-		return -EINVAL;
-
-	return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-}
-#endif
-#endif
-
 asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 			   int __user *upeer_addrlen)
 {
-	return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
+	return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
 }
 
 /*
@@ -2095,7 +2039,7 @@ static const unsigned char nargs[19]={
 	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
 	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-	AL(6)
+	AL(4)
 };
 
 #undef AL
@@ -2114,7 +2058,7 @@ asmlinkage long sys_socketcall(int call,
 	unsigned long a0, a1;
 	int err;
 
-	if (call < 1 || call > SYS_PACCEPT)
+	if (call < 1 || call > SYS_ACCEPT4)
 		return -EINVAL;
 
 	/* copy_from_user should be SMP safe. */
@@ -2142,9 +2086,8 @@ asmlinkage long sys_socketcall(int call,
 		err = sys_listen(a0, a1);
 		break;
 	case SYS_ACCEPT:
-		err =
-		    do_accept(a0, (struct sockaddr __user *)a1,
-			      (int __user *)a[2], 0);
+		err = sys_accept4(a0, (struct sockaddr __user *)a1,
+				  (int __user *)a[2], 0);
 		break;
 	case SYS_GETSOCKNAME:
 		err =
@@ -2191,12 +2134,9 @@ asmlinkage long sys_socketcall(int call,
 	case SYS_RECVMSG:
 		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
-	case SYS_PACCEPT:
-		err =
-		    sys_paccept(a0, (struct sockaddr __user *)a1,
-			        (int __user *)a[2],
-				(const sigset_t __user *) a[3],
-				a[4], a[5]);
+	case SYS_ACCEPT4:
+		err = sys_accept4(a0, (struct sockaddr __user *)a1,
+				  (int __user *)a[2], a[3]);
 		break;
 	default:
 		err = -EINVAL;
_


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-11-13 22:57         ` Andrew Morton
@ 2008-11-14  0:07           ` David Miller
  2008-11-14 15:24           ` Michael Kerrisk
  2008-11-14 17:40           ` Michael Kerrisk
  2 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2008-11-14  0:07 UTC (permalink / raw)
  To: akpm
  Cc: paulus, mtk.manpages, subrata, linux-arch, drepper, linux-kernel,
	torvalds, linux-api, linux-man, davidel, netdev, roland, oleg,
	hch, alan, jakub

From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 13 Nov 2008 14:57:37 -0800

> Here's the latest version, for review-n-test enjoyment:

This adds the sparc syscall hookups.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/unistd_32.h b/arch/sparc/include/asm/unistd_32.h
index 648643a..0d13d2a 100644
--- a/arch/sparc/include/asm/unistd_32.h
+++ b/arch/sparc/include/asm/unistd_32.h
@@ -338,8 +338,9 @@
 #define __NR_dup3		320
 #define __NR_pipe2		321
 #define __NR_inotify_init1	322
+#define __NR_accept4		323
 
-#define NR_SYSCALLS		323
+#define NR_SYSCALLS		324
 
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
  * it never had the plain ones and there is no value to adding those
diff --git a/arch/sparc/include/asm/unistd_64.h b/arch/sparc/include/asm/unistd_64.h
index c5cc0e0..fa5d3c0 100644
--- a/arch/sparc/include/asm/unistd_64.h
+++ b/arch/sparc/include/asm/unistd_64.h
@@ -340,8 +340,9 @@
 #define __NR_dup3		320
 #define __NR_pipe2		321
 #define __NR_inotify_init1	322
+#define __NR_accept4		323
 
-#define NR_SYSCALLS		323
+#define NR_SYSCALLS		324
 
 #ifdef __KERNEL__
 #define __ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S
index e1b9233..7d08075 100644
--- a/arch/sparc/kernel/systbls.S
+++ b/arch/sparc/kernel/systbls.S
@@ -81,4 +81,4 @@ sys_call_table:
 /*305*/	.long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
 /*310*/	.long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 /*315*/	.long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
-/*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1
+/*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4
diff --git a/arch/sparc64/kernel/sys32.S b/arch/sparc64/kernel/sys32.S
index ade18ba..f061c4d 100644
--- a/arch/sparc64/kernel/sys32.S
+++ b/arch/sparc64/kernel/sys32.S
@@ -150,7 +150,7 @@ sys32_mmap2:
 sys32_socketcall:	/* %o0=call, %o1=args */
 	cmp		%o0, 1
 	bl,pn		%xcc, do_einval
-	 cmp		%o0, 17
+	 cmp		%o0, 18
 	bg,pn		%xcc, do_einval
 	 sub		%o0, 1, %o0
 	sllx		%o0, 5, %o0
@@ -319,6 +319,15 @@ do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int)
 	nop
 	nop
 	nop
+do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */
+63:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_accept4), %g1
+64:	lduwa		[%o1 + 0x8] %asi, %o2
+65:	ldswa		[%o1 + 0xc] %asi, %o3
+	jmpl		%g1 + %lo(sys_accept4), %g0
+66:	 lduwa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
 
 	.section	__ex_table,"a"
 	.align		4
@@ -353,4 +362,6 @@ do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int)
 	.word		57b, __retl_efault, 58b, __retl_efault
 	.word		59b, __retl_efault, 60b, __retl_efault
 	.word		61b, __retl_efault, 62b, __retl_efault
+	.word		63b, __retl_efault, 64b, __retl_efault
+	.word		65b, __retl_efault, 66b, __retl_efault
 	.previous
diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
index b2fa4c1..9fc78cf 100644
--- a/arch/sparc64/kernel/systbls.S
+++ b/arch/sparc64/kernel/systbls.S
@@ -82,7 +82,7 @@ sys_call_table32:
 	.word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait
 /*310*/	.word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
 	.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
-/*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1
+/*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4
 
 #endif /* CONFIG_COMPAT */
 
@@ -156,4 +156,4 @@ sys_call_table:
 	.word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
 /*310*/	.word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 	.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
-/*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1
+/*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-11-13 22:05   ` Andrew Morton
  2008-11-13 22:25     ` Paul Mackerras
@ 2008-11-14 15:24     ` Michael Kerrisk
  1 sibling, 0 replies; 17+ messages in thread
From: Michael Kerrisk @ 2008-11-14 15:24 UTC (permalink / raw)
  To: Andrew Morton
  Cc: subrata, linux-arch, drepper, linux-kernel, torvalds, linux-api,
	linux-man, davidel, netdev, roland, oleg, hch, davem, alan,
	jakub

>> Andrew, you noted a lack of explanation accompanying the original
>> patch.  Here's something to fill the gap, and which may be suitable
>> for the changelog.
>>
>> ==
>> Introduce a new accept4() system call.  The addition of this system
>> call matches analogous changes in 2.6.27 (dup3(), evenfd2(),
>> signalfd4(), inotify_init1(), epoll_create1(), pipe2()) which added
>> new system calls that differed from analogous traditional system calls
>> in adding a flags argument that can be used to access additional
>> functionality. The accept4() system call is exactly the same as
>> accept(), except that it adds a flags bit-mask argument.  Two flags
>> are initially implemented.  (Most of the new system calls in 2.6.27
>> also had both of these flags.)  SOCK_CLOEXEC causes the close-on-exec
>> (FD_CLOEXEC) flag to be enabled for the new file descriptor returned
>> by accept4().  This is a useful security feature to avoid leaking
>> information in a multithreaded program where one thread is doing an
>> accept() at the same time as another thread is doing a fork() plus
>> exec().  (More details here:
>> http://udrepper.livejournal.com/20407.html "Secure File Descriptor
>> Handling", Ulrich Drepper)  The other flag is SOCK_NONBLOCK, which
>> causes the O_NONBLOCK flag to be enabled on the new open file
>> description created by accept4().  (This flag is merely a convenience,
>> saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL)
>> to achieve the same result.)
>
> I replaced the existing changelog with the above (plus some paragraph
> breaks ;)).  Will add the new test app when it comes along.

Git allows paragraph breaks in changelogs?!  You gotta love technology ;-).

-- 
Michael Kerrisk Linux man-pages maintainer;
http://www.kernel.org/doc/man-pages/ Found a documentation bug?
http://www.kernel.org/doc/man-pages/reporting_bugs.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-11-13 22:57         ` Andrew Morton
  2008-11-14  0:07           ` David Miller
@ 2008-11-14 15:24           ` Michael Kerrisk
  2008-11-14 17:40           ` Michael Kerrisk
  2 siblings, 0 replies; 17+ messages in thread
From: Michael Kerrisk @ 2008-11-14 15:24 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Paul Mackerras, subrata, linux-arch, drepper, linux-kernel,
	torvalds, linux-api, linux-man, davidel, netdev, roland, oleg,
	hch, davem, alan, jakub

Andrew,

> From: Ulrich Drepper <drepper@redhat.com>
>
> Introduce a new accept4() system call.  The addition of this system call
> matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
> inotify_init1(), epoll_create1(), pipe2()) which added new system calls
> that differed from analogous traditional system calls in adding a flags
> argument that can be used to access additional functionality.
>
> The accept4() system call is exactly the same as accept(), except that it
> adds a flags bit-mask argument.  Two flags are initially implemented.
> (Most of the new system calls in 2.6.27 also had both of these flags.)

Add a para break here.

> SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled for
> the new file descriptor returned by accept4().  This is a useful security
> feature to avoid leaking information in a multithreaded program where one
> thread is doing an accept() at the same time as another thread is doing a
> fork() plus exec().
>

This para break is in the wrong place.

> More details here: http://udrepper.livejournal.com/20407.html "Secure File
> Descriptor Handling", Ulrich Drepper) The other flag is SOCK_NONBLOCK,

The para break should actually be at the sentence break in the previous line.

Cheers,

Michael

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] reintroduce accept4
  2008-11-13 22:57         ` Andrew Morton
  2008-11-14  0:07           ` David Miller
  2008-11-14 15:24           ` Michael Kerrisk
@ 2008-11-14 17:40           ` Michael Kerrisk
  2 siblings, 0 replies; 17+ messages in thread
From: Michael Kerrisk @ 2008-11-14 17:40 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Paul Mackerras, subrata, linux-arch, drepper, linux-kernel,
	torvalds, linux-api, linux-man, davidel, netdev, roland, oleg,
	hch, davem, alan, jakub

> Here's the latest version, for review-n-test enjoyment:

Andrew,

I made a few small tweaks to the changelog: wording fixes, and para
break fixes; other than that, changed the text to say that I rewrote
(rather than updated) the test program.  Please substitute it with the
following.

Cheers,

Michael

===
From: Ulrich Drepper <drepper@redhat.com>

Introduce a new accept4() system call.  The addition of this system call
matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
inotify_init1(), epoll_create1(), pipe2()) which added new system calls
that differed from analogous traditional system calls in adding a flags
argument that can be used to access additional functionality.

The accept4() system call is exactly the same as accept(), except that
it adds a flags bit-mask argument.  Two flags are initially implemented.
(Most of the new system calls in 2.6.27 also had both of these flags.)

SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled
for the new file descriptor returned by accept4().  This is a useful
security feature to avoid leaking information in a multithreaded
program where one thread is doing an accept() at the same time as
another thread is doing a fork() plus exec().  More details here:
http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling",
Ulrich Drepper).

The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag
to be enabled on the new open file description created by accept4().
(This flag is merely a convenience, saving the use of additional calls
fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result.

Here's a test program.  Works on x86-32.  Should work on x86-64, but
I (mtk) don't have a system to hand to test with.

It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies
that the appropriate flags are set on the file descriptor/open file
description returned by accept4().

I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.

/* test_accept4.c

  Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
       <mtk.manpages@gmail.com>

  Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>

#define PORT_NUM 33333

#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)

/**********************************************************************/

/* The following is what we need until glibc gets a wrapper for
  accept4() */

/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC    O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK   O_NONBLOCK
#endif

#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif

static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
   printf("Calling accept4(): flags = %x", flags);
   if (flags != 0) {
       printf(" (");
       if (flags & SOCK_CLOEXEC)
           printf("SOCK_CLOEXEC");
       if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
           printf(" ");
       if (flags & SOCK_NONBLOCK)
           printf("SOCK_NONBLOCK");
       printf(")");
   }
   printf("\n");

#if USE_SOCKETCALL
   long args[6];

   args[0] = fd;
   args[1] = (long) sockaddr;
   args[2] = (long) addrlen;
   args[3] = flags;

   return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
   return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}

/**********************************************************************/


static int
do_test(int lfd, struct sockaddr_in *conn_addr,
       int closeonexec_flag, int nonblock_flag)
{
   int connfd, acceptfd;
   int fdf, flf, fdf_pass, flf_pass;
   struct sockaddr_in claddr;
   socklen_t addrlen;

   printf("=======================================\n");

   connfd = socket(AF_INET, SOCK_STREAM, 0);
   if (connfd == -1)
       die("socket");
   if (connect(connfd, (struct sockaddr *) conn_addr,
               sizeof(struct sockaddr_in)) == -1)
       die("connect");

   addrlen = sizeof(struct sockaddr_in);
   acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
                      closeonexec_flag | nonblock_flag);
   if (acceptfd == -1) {
       perror("accept4()");
       close(connfd);
       return 0;
   }

   fdf = fcntl(acceptfd, F_GETFD);
   if (fdf == -1)
       die("fcntl:F_GETFD");
   fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
              ((closeonexec_flag & SOCK_CLOEXEC) != 0);
   printf("Close-on-exec flag is %sset (%s); ",
           (fdf & FD_CLOEXEC) ? "" : "not ",
           fdf_pass ? "OK" : "failed");

   flf = fcntl(acceptfd, F_GETFL);
   if (flf == -1)
       die("fcntl:F_GETFD");
   flf_pass = ((flf & O_NONBLOCK) != 0) ==
              ((nonblock_flag & SOCK_NONBLOCK) !=0);
   printf("nonblock flag is %sset (%s)\n",
           (flf & O_NONBLOCK) ? "" : "not ",
           flf_pass ? "OK" : "failed");

   close(acceptfd);
   close(connfd);

   printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
   return fdf_pass && flf_pass;
}


static int
create_listening_socket(int port_num)
{
   struct sockaddr_in svaddr;
   int lfd;
   int optval;

   memset(&svaddr, 0, sizeof(struct sockaddr_in));
   svaddr.sin_family = AF_INET;
   svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
   svaddr.sin_port = htons(port_num);

   lfd = socket(AF_INET, SOCK_STREAM, 0);
   if (lfd == -1)
       die("socket");

   optval = 1;
   if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
                  sizeof(optval)) == -1)
       die("setsockopt");

   if (bind(lfd, (struct sockaddr *) &svaddr,
            sizeof(struct sockaddr_in)) == -1)
       die("bind");

   if (listen(lfd, 5) == -1)
       die("listen");

   return lfd;
}


int
main(int argc, char *argv[])
{
   struct sockaddr_in conn_addr;
   int lfd;
   int port_num;
   int passed;

   passed = 1;

   port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;

   memset(&conn_addr, 0, sizeof(struct sockaddr_in));
   conn_addr.sin_family = AF_INET;
   conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   conn_addr.sin_port = htons(port_num);

   lfd = create_listening_socket(port_num);

   if (!do_test(lfd, &conn_addr, 0, 0))
       passed = 0;
   if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
       passed = 0;
   if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
       passed = 0;
   if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
       passed = 0;

   close(lfd);

   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}

[mtk.manpages@gmail.com: rewrote changelog and test program]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2008-11-14 17:41 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-10-26 16:41 [PATCH] reintroduce accept4 Ulrich Drepper
2008-10-28  3:41 ` Andrew Morton
2008-10-28  4:22   ` Ulrich Drepper
2008-10-28  4:52     ` Andrew Morton
2008-10-28 12:34 ` Michael Kerrisk
2008-11-13 21:51 ` Michael Kerrisk
2008-11-13 22:02   ` Michael Kerrisk
2008-11-13 22:11     ` Michael Kerrisk
2008-11-13 22:14       ` Michael Kerrisk
2008-11-13 22:05   ` Andrew Morton
2008-11-13 22:25     ` Paul Mackerras
2008-11-13 22:28       ` Paul Mackerras
2008-11-13 22:57         ` Andrew Morton
2008-11-14  0:07           ` David Miller
2008-11-14 15:24           ` Michael Kerrisk
2008-11-14 17:40           ` Michael Kerrisk
2008-11-14 15:24     ` Michael Kerrisk

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).