All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
To: Petr Mladek <pmladek@suse.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>,
	Michal Hocko <mhocko@kernel.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	linux-mm@kvack.org, syzkaller-bugs@googlegroups.com, guro@fb.com,
	kirill.shutemov@linux.intel.com, linux-kernel@vger.kernel.org,
	rientjes@google.com, yang.s@alibaba-inc.com,
	Andrew Morton <akpm@linux-foundation.org>,
	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	syzbot <syzbot+77e6b28a7a7106ad0def@syzkaller.appspotmail.com>
Subject: Re: [PATCH v3] mm: memcontrol: Don't flood OOM messages with no eligible task.
Date: Tue, 23 Oct 2018 19:23:00 +0900	[thread overview]
Message-ID: <5251d336-4ad0-ccf0-e31f-35d9c832b0be@i-love.sakura.ne.jp> (raw)
In-Reply-To: <20181023082111.edb3ela4mhwaaimi@pathway.suse.cz>

On 2018/10/23 17:21, Petr Mladek wrote:
> On Fri 2018-10-19 09:18:16, Tetsuo Handa wrote:
>> I assumed we calculate the average dynamically, for the amount of
>> messages printed by an OOM event is highly unstable (depends on
>> hardware configuration such as number of nodes, number of zones,
>> and how many processes are there as a candidate for OOM victim).
> 
> Is there any idea how the average length can be counted dynamically?

I don't have one. Maybe sum up return values of printk() from OOM context?



> This reminds me another problem. We would need to use the same
> decision for all printk() calls that logically belongs to each
> other. Otherwise we might get mixed lines that might confuse
> poeple. I mean that OOM messages might look like:
> 
>   OOM: A
>   OOM: B
>   OOM: C
> 
> If we do not synchronize the rateliting, we might see:
> 
>   OOM: A
>   OOM: B
>   OOM: C
>   OOM: B
>   OOM: B
>   OOM: A
>   OOM: C
>   OOM: C

Messages from out_of_memory() are serialized by oom_lock mutex.
Messages from warn_alloc() are not serialized, and thus cause confusion.



>> I wish that memcg OOM events do not use printk(). Since memcg OOM is not
>> out of physical memory, we can dynamically allocate physical memory for
>> holding memcg OOM messages and let the userspace poll it via some interface.
> 
> Would the userspace work when the system gets blocked on allocations?

Yes for memcg OOM events. No for global OOM events.
You can try reproducers shown below from your environment.

Regarding case 2, we can solve the problem by checking tsk_is_oom_victim(current) == true.
But regarding case 1, Michal's patch is not sufficient for allowing administrators
to enter commands for recovery from console.

---------- Case 1: Flood of memcg OOM events caused by misconfiguration. ----------

#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>

int main(int argc, char *argv[])
{
	FILE *fp;
	const unsigned long size = 1048576 * 200;
	char *buf = malloc(size);
	mkdir("/sys/fs/cgroup/memory/test1", 0755);
	fp = fopen("/sys/fs/cgroup/memory/test1/memory.limit_in_bytes", "w");
	fprintf(fp, "%lu\n", size / 2);
	fclose(fp);
	fp = fopen("/sys/fs/cgroup/memory/test1/tasks", "w");
	fprintf(fp, "%u\n", getpid());
	fclose(fp);
	fp = fopen("/proc/self/oom_score_adj", "w");
	fprintf(fp, "-1000\n");
	fclose(fp);
	fp = fopen("/dev/zero", "r");
	fread(buf, 1, size, fp);
	fclose(fp);
	return 0;
}

---------- Case 2: Flood of memcg OOM events caused by MMF_OOM_SKIP race. ----------

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sched.h>
#include <sys/mman.h>

#define NUMTHREADS 256
#define MMAPSIZE 4 * 10485760
#define STACKSIZE 4096
static int pipe_fd[2] = { EOF, EOF };
static int memory_eater(void *unused)
{
	int fd = open("/dev/zero", O_RDONLY);
	char *buf = mmap(NULL, MMAPSIZE, PROT_WRITE | PROT_READ,
			 MAP_ANONYMOUS | MAP_SHARED, EOF, 0);
	read(pipe_fd[0], buf, 1);
	read(fd, buf, MMAPSIZE);
	pause();
	return 0;
}
int main(int argc, char *argv[])
{
	int i;
	char *stack;
	FILE *fp;
	const unsigned long size = 1048576 * 200;
	mkdir("/sys/fs/cgroup/memory/test1", 0755);
	fp = fopen("/sys/fs/cgroup/memory/test1/memory.limit_in_bytes", "w");
	fprintf(fp, "%lu\n", size);
	fclose(fp);
	fp = fopen("/sys/fs/cgroup/memory/test1/tasks", "w");
	fprintf(fp, "%u\n", getpid());
	fclose(fp);
	if (setgid(-2) || setuid(-2))
		return 1;
	stack = mmap(NULL, STACKSIZE * NUMTHREADS, PROT_WRITE | PROT_READ,
		     MAP_ANONYMOUS | MAP_SHARED, EOF, 0);
	for (i = 0; i < NUMTHREADS; i++)
		if (clone(memory_eater, stack + (i + 1) * STACKSIZE,
			  CLONE_SIGHAND | CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES, NULL) == -1)
			break;
	sleep(1);
	close(pipe_fd[1]);
	pause();
	return 0;
}


  reply	other threads:[~2018-10-23 10:23 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-17 10:06 Tetsuo Handa
2018-10-17 10:28 ` Michal Hocko
2018-10-17 11:17   ` Sergey Senozhatsky
2018-10-17 11:29     ` Michal Hocko
2018-10-18  2:46     ` Tetsuo Handa
2018-10-18  2:46       ` Tetsuo Handa
2018-10-18  4:27       ` Sergey Senozhatsky
2018-10-18  5:26         ` Tetsuo Handa
2018-10-18  5:26           ` Tetsuo Handa
2018-10-18  6:10           ` Sergey Senozhatsky
2018-10-18  7:56             ` Michal Hocko
2018-10-18  8:13               ` Sergey Senozhatsky
2018-10-18 11:58                 ` Tetsuo Handa
2018-10-18 23:54                   ` Sergey Senozhatsky
2018-10-19 10:35                     ` Tetsuo Handa
2018-10-19 10:35                       ` Tetsuo Handa
2018-10-23  0:47                       ` Sergey Senozhatsky
2018-10-23  8:37                       ` Petr Mladek
2018-10-23  8:54                         ` Michal Hocko
2018-10-18 14:30         ` Petr Mladek
2018-10-19  0:18           ` Tetsuo Handa
2018-10-23  8:21             ` Petr Mladek
2018-10-23 10:23               ` Tetsuo Handa [this message]
2018-10-18  6:55       ` Michal Hocko
2018-10-18 10:37         ` Tetsuo Handa
2018-10-18 11:23           ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5251d336-4ad0-ccf0-e31f-35d9c832b0be@i-love.sakura.ne.jp \
    --to=penguin-kernel@i-love.sakura.ne.jp \
    --cc=akpm@linux-foundation.org \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=pmladek@suse.com \
    --cc=rientjes@google.com \
    --cc=rostedt@goodmis.org \
    --cc=sergey.senozhatsky.work@gmail.com \
    --cc=sergey.senozhatsky@gmail.com \
    --cc=syzbot+77e6b28a7a7106ad0def@syzkaller.appspotmail.com \
    --cc=syzkaller-bugs@googlegroups.com \
    --cc=yang.s@alibaba-inc.com \
    --subject='Re: [PATCH v3] mm: memcontrol: Don'\''t flood OOM messages with no eligible task.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.