All of lore.kernel.org
 help / color / mirror / Atom feed
From: Waiman Long <longman@redhat.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Vladimir Davydov <vdavydov.dev@gmail.com>,
	Jonathan Corbet <corbet@lwn.net>,
	Alexey Dobriyan <adobriyan@gmail.com>,
	Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>
Cc: linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, cgroups@vger.kernel.org,
	linux-mm@kvack.org, Waiman Long <longman@redhat.com>
Subject: [RFC PATCH 2/8] memcg, mm: Return ENOMEM or delay if memcg_over_limit
Date: Mon, 17 Aug 2020 10:08:25 -0400	[thread overview]
Message-ID: <20200817140831.30260-3-longman@redhat.com> (raw)
In-Reply-To: <20200817140831.30260-1-longman@redhat.com>

The brk(), mmap(), mlock(), mlockall() and mprotect() syscalls are
modified to check the memcg_over_limit flag and return ENOMEM when it
is set and memory control action is PR_MEMACT_ENOMEM.

In case the action is PR_MEMACT_SLOWDOWN, an artificial delay of 20ms
will be added to slow down the memory allocation syscalls.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 include/linux/sched.h | 16 ++++++++++++++++
 kernel/fork.c         |  1 +
 mm/memcontrol.c       | 25 +++++++++++++++++++++++--
 mm/mlock.c            |  6 ++++++
 mm/mmap.c             | 12 ++++++++++++
 mm/mprotect.c         |  3 +++
 6 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c79d606d27ab..9ec1bd072334 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1477,6 +1477,22 @@ static inline char task_state_to_char(struct task_struct *tsk)
 	return task_index_to_char(task_state_index(tsk));
 }
 
+#ifdef CONFIG_MEMCG
+extern bool mem_cgroup_check_over_limit(void);
+
+static inline bool mem_over_memcg_limit(void)
+{
+	if (READ_ONCE(current->memcg_over_limit))
+		return mem_cgroup_check_over_limit();
+	return false;
+}
+#else
+static inline bool mem_over_memcg_limit(void)
+{
+	return false;
+}
+#endif
+
 /**
  * is_global_init - check if a task structure is init. Since init
  * is free to have sub-threads we need to check tgid.
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d32190861bd..61f9a9e5f857 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -940,6 +940,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 
 #ifdef CONFIG_MEMCG
 	tsk->active_memcg = NULL;
+	tsk->memcg_over_limit = false;
 #endif
 	return tsk;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1106dac024ac..5cad7bb26d13 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2646,7 +2646,9 @@ static bool __mem_cgroup_over_high_action(struct mem_cgroup *memcg, u8 action)
 	if (!mm)
 		return true;	/* No more check is needed */
 
-	current->memcg_over_limit = false;
+	if (READ_ONCE(current->memcg_over_limit))
+		WRITE_ONCE(current->memcg_over_limit, false);
+
 	if ((action == PR_MEMACT_SIGNAL) && !signal)
 		goto out;
 
@@ -2660,7 +2662,11 @@ static bool __mem_cgroup_over_high_action(struct mem_cgroup *memcg, u8 action)
 		WRITE_ONCE(current->memcg_over_limit, true);
 		break;
 	case PR_MEMACT_SLOWDOWN:
-		/* Slow down by yielding the cpu */
+		/*
+		 * Slow down by yielding the cpu & adding delay to
+		 * memory allocation syscalls.
+		 */
+		WRITE_ONCE(current->memcg_over_limit, true);
 		set_tsk_need_resched(current);
 		set_preempt_need_resched();
 		break;
@@ -2694,6 +2700,21 @@ static inline bool mem_cgroup_over_high_action(struct mem_cgroup *memcg)
 	return __mem_cgroup_over_high_action(memcg, action);
 }
 
+/*
+ * Called from memory allocation syscalls.
+ * Return true if ENOMEM should be returned, false otherwise.
+ */
+bool mem_cgroup_check_over_limit(void)
+{
+	u8 action = READ_ONCE(current->memcg_over_high_action);
+
+	if (action == PR_MEMACT_ENOMEM)
+		return true;
+	if (action == PR_MEMACT_SLOWDOWN)
+		msleep(20);	/* Artificial delay of 20ms */
+	return false;
+}
+
 static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		      unsigned int nr_pages)
 {
diff --git a/mm/mlock.c b/mm/mlock.c
index 93ca2bf30b4f..130d4b3fa0f5 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -678,6 +678,9 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
 	if (!can_do_mlock())
 		return -EPERM;
 
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	len = PAGE_ALIGN(len + (offset_in_page(start)));
 	start &= PAGE_MASK;
 
@@ -807,6 +810,9 @@ SYSCALL_DEFINE1(mlockall, int, flags)
 	if (!can_do_mlock())
 		return -EPERM;
 
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	lock_limit = rlimit(RLIMIT_MEMLOCK);
 	lock_limit >>= PAGE_SHIFT;
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 40248d84ad5f..873ccf2560a6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -198,6 +198,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	bool downgraded = false;
 	LIST_HEAD(uf);
 
+	/* Too much memory used? */
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
@@ -1407,6 +1411,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	if (mm->map_count > sysctl_max_map_count)
 		return -ENOMEM;
 
+	/* Too much memory used? */
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	/* Obtain the address to map to. we verify (or select) it and ensure
 	 * that it represents a valid section of the address space.
 	 */
@@ -1557,6 +1565,10 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 	struct file *file = NULL;
 	unsigned long retval;
 
+	/* Too much memory used? */
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	if (!(flags & MAP_ANONYMOUS)) {
 		audit_mmap_fd(fd, flags);
 		file = fget(fd);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ce8b8a5eacbb..b2c0f50bb0a0 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -519,6 +519,9 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 	const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
 				(prot & PROT_READ);
 
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	start = untagged_addr(start);
 
 	prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
-- 
2.18.1


  parent reply	other threads:[~2020-08-17 14:10 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-17 14:08 [RFC PATCH 0/8] memcg: Enable fine-grained per process memory control Waiman Long
2020-08-17 14:08 ` [RFC PATCH 1/8] memcg: Enable fine-grained control of over memory.high action Waiman Long
2020-08-17 14:30   ` Chris Down
2020-08-17 15:38     ` Waiman Long
2020-08-17 16:11       ` Chris Down
2020-08-17 16:44   ` Shakeel Butt
2020-08-17 16:44     ` Shakeel Butt
2020-08-17 16:56     ` Chris Down
2020-08-18 19:12       ` Waiman Long
2020-08-18 19:14     ` Waiman Long
2020-08-17 14:08 ` Waiman Long [this message]
2020-08-17 14:08 ` [RFC PATCH 3/8] memcg: Allow the use of task RSS memory as over-high action trigger Waiman Long
2020-08-17 14:08   ` Waiman Long
2020-08-17 14:08 ` [RFC PATCH 4/8] fs/proc: Support a new procfs memctl file Waiman Long
2020-08-17 20:10   ` kernel test robot
2020-08-17 20:10   ` [RFC PATCH] fs/proc: proc_memctl_operations can be static kernel test robot
2020-08-17 14:08 ` [RFC PATCH 5/8] memcg: Allow direct per-task memory limit checking Waiman Long
2020-08-17 14:08 ` [RFC PATCH 6/8] memcg: Introduce additional memory control slowdown if needed Waiman Long
2020-08-17 14:08 ` [RFC PATCH 7/8] memcg: Enable logging of memory control mitigation action Waiman Long
2020-08-17 14:08   ` Waiman Long
2020-08-17 14:08 ` [RFC PATCH 8/8] memcg: Add over-high action prctl() documentation Waiman Long
2020-08-17 15:26 ` [RFC PATCH 0/8] memcg: Enable fine-grained per process memory control Michal Hocko
2020-08-17 15:55   ` Waiman Long
2020-08-17 15:55     ` Waiman Long
2020-08-17 19:26     ` Michal Hocko
2020-08-18 19:20       ` Waiman Long
2020-08-18 19:20         ` Waiman Long
2020-08-18  9:14 ` peterz
2020-08-18  9:14   ` peterz-wEGCiKHe2LqWVfeAwA7xHQ
2020-08-18  9:26   ` Michal Hocko
2020-08-18  9:26     ` Michal Hocko
2020-08-18  9:59     ` peterz
2020-08-18  9:59       ` peterz-wEGCiKHe2LqWVfeAwA7xHQ
2020-08-18 10:05       ` Michal Hocko
2020-08-18 10:18         ` peterz
2020-08-18 10:30           ` Michal Hocko
2020-08-18 10:36             ` peterz
2020-08-18 13:49           ` Johannes Weiner
2020-08-18 13:49             ` Johannes Weiner
2020-08-21 19:37             ` Peter Zijlstra
2020-08-21 19:37               ` Peter Zijlstra
2020-08-24 16:58               ` Johannes Weiner
2020-09-07 11:47                 ` Chris Down
2020-09-09 11:53                 ` Michal Hocko
2020-08-18 10:17       ` Chris Down
2020-08-18 10:17         ` Chris Down
2020-08-18 10:26         ` peterz
2020-08-18 10:35           ` Chris Down
2020-08-23  2:49         ` Waiman Long
2020-08-23  2:49           ` Waiman Long
2020-08-18  9:27   ` Chris Down
2020-08-18  9:27     ` Chris Down
2020-08-18 10:04     ` peterz
2020-08-18 12:55       ` Matthew Wilcox
2020-08-18 12:55         ` Matthew Wilcox
2020-08-20  6:11         ` Dave Chinner
2020-08-18 19:30     ` Waiman Long
2020-08-18 19:27   ` Waiman Long

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200817140831.30260-3-longman@redhat.com \
    --to=longman@redhat.com \
    --cc=adobriyan@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=corbet@lwn.net \
    --cc=hannes@cmpxchg.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=vdavydov.dev@gmail.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.