mm-commits Archive on lore.kernel.org
 help / color / Atom feed
From: Andrew Morton <akpm@linux-foundation.org>
To: akpm@linux-foundation.org, chris@chrisdown.name, guro@fb.com,
	hannes@cmpxchg.org, linux-mm@kvack.org, mhocko@kernel.org,
	mm-commits@vger.kernel.org, natechancellor@gmail.com,
	stable@vger.kernel.org, tj@kernel.org,
	torvalds@linux-foundation.org
Subject: [patch 05/10] mm, memcg: throttle allocators based on ancestral memory.high
Date: Sat, 21 Mar 2020 18:22:23 -0700
Message-ID: <20200322012223.ExM_gA7dC%akpm@linux-foundation.org> (raw)
In-Reply-To: <20200321181954.c0564dfd5514cd742b534884@linux-foundation.org>

From: Chris Down <chris@chrisdown.name>
Subject: mm, memcg: throttle allocators based on ancestral memory.high

Prior to this commit, we only directly check the affected cgroup's
memory.high against its usage.  However, it's possible that we are being
reclaimed as a result of hitting an ancestor memory.high and should be
penalised based on that, instead.

This patch changes memory.high overage throttling to use the largest
overage in its ancestors when considering how many penalty jiffies to
charge.  This makes sure that we penalise poorly behaving cgroups in the
same way regardless of at what level of the hierarchy memory.high was
breached.

Link: http://lkml.kernel.org/r/8cd132f84bd7e16cdb8fde3378cdbf05ba00d387.1584036142.git.chris@chrisdown.name
Fixes: 0e4b01df8659 ("mm, memcg: throttle allocators when failing reclaim over memory.high")
Signed-off-by: Chris Down <chris@chrisdown.name>
Reported-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Nathan Chancellor <natechancellor@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: <stable@vger.kernel.org>	[5.4.x+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/memcontrol.c |   93 ++++++++++++++++++++++++++++------------------
 1 file changed, 58 insertions(+), 35 deletions(-)

--- a/mm/memcontrol.c~mm-memcg-throttle-allocators-based-on-ancestral-memoryhigh
+++ a/mm/memcontrol.c
@@ -2297,28 +2297,41 @@ static void high_work_func(struct work_s
  #define MEMCG_DELAY_SCALING_SHIFT 14
 
 /*
- * Scheduled by try_charge() to be executed from the userland return path
- * and reclaims memory over the high limit.
+ * Get the number of jiffies that we should penalise a mischievous cgroup which
+ * is exceeding its memory.high by checking both it and its ancestors.
  */
-void mem_cgroup_handle_over_high(void)
+static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
+					  unsigned int nr_pages)
 {
-	unsigned long usage, high, clamped_high;
-	unsigned long pflags;
-	unsigned long penalty_jiffies, overage;
-	unsigned int nr_pages = current->memcg_nr_pages_over_high;
-	struct mem_cgroup *memcg;
+	unsigned long penalty_jiffies;
+	u64 max_overage = 0;
 
-	if (likely(!nr_pages))
-		return;
+	do {
+		unsigned long usage, high;
+		u64 overage;
+
+		usage = page_counter_read(&memcg->memory);
+		high = READ_ONCE(memcg->high);
+
+		/*
+		 * Prevent division by 0 in overage calculation by acting as if
+		 * it was a threshold of 1 page
+		 */
+		high = max(high, 1UL);
+
+		overage = usage - high;
+		overage <<= MEMCG_DELAY_PRECISION_SHIFT;
+		overage = div64_u64(overage, high);
+
+		if (overage > max_overage)
+			max_overage = overage;
+	} while ((memcg = parent_mem_cgroup(memcg)) &&
+		 !mem_cgroup_is_root(memcg));
 
-	memcg = get_mem_cgroup_from_mm(current->mm);
-	reclaim_high(memcg, nr_pages, GFP_KERNEL);
-	current->memcg_nr_pages_over_high = 0;
+	if (!max_overage)
+		return 0;
 
 	/*
-	 * memory.high is breached and reclaim is unable to keep up. Throttle
-	 * allocators proactively to slow down excessive growth.
-	 *
 	 * We use overage compared to memory.high to calculate the number of
 	 * jiffies to sleep (penalty_jiffies). Ideally this value should be
 	 * fairly lenient on small overages, and increasingly harsh when the
@@ -2326,24 +2339,9 @@ void mem_cgroup_handle_over_high(void)
 	 * its crazy behaviour, so we exponentially increase the delay based on
 	 * overage amount.
 	 */
-
-	usage = page_counter_read(&memcg->memory);
-	high = READ_ONCE(memcg->high);
-
-	if (usage <= high)
-		goto out;
-
-	/*
-	 * Prevent division by 0 in overage calculation by acting as if it was a
-	 * threshold of 1 page
-	 */
-	clamped_high = max(high, 1UL);
-
-	overage = div64_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
-			  clamped_high);
-
-	penalty_jiffies = ((u64)overage * overage * HZ)
-		>> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
+	penalty_jiffies = max_overage * max_overage * HZ;
+	penalty_jiffies >>= MEMCG_DELAY_PRECISION_SHIFT;
+	penalty_jiffies >>= MEMCG_DELAY_SCALING_SHIFT;
 
 	/*
 	 * Factor in the task's own contribution to the overage, such that four
@@ -2360,7 +2358,32 @@ void mem_cgroup_handle_over_high(void)
 	 * application moving forwards and also permit diagnostics, albeit
 	 * extremely slowly.
 	 */
-	penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+	return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+}
+
+/*
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+void mem_cgroup_handle_over_high(void)
+{
+	unsigned long penalty_jiffies;
+	unsigned long pflags;
+	unsigned int nr_pages = current->memcg_nr_pages_over_high;
+	struct mem_cgroup *memcg;
+
+	if (likely(!nr_pages))
+		return;
+
+	memcg = get_mem_cgroup_from_mm(current->mm);
+	reclaim_high(memcg, nr_pages, GFP_KERNEL);
+	current->memcg_nr_pages_over_high = 0;
+
+	/*
+	 * memory.high is breached and reclaim is unable to keep up. Throttle
+	 * allocators proactively to slow down excessive growth.
+	 */
+	penalty_jiffies = calculate_high_delay(memcg, nr_pages);
 
 	/*
 	 * Don't sleep if the amount of jiffies this memcg owes us is so low
_

  parent reply index

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-22  1:19 incoming Andrew Morton
2020-03-22  1:22 ` [patch 01/10] memcg: fix NULL pointer dereference in __mem_cgroup_usage_unregister_event Andrew Morton
2020-03-22  1:22 ` [patch 02/10] mm/hotplug: fix hot remove failure in SPARSEMEM|!VMEMMAP case Andrew Morton
2020-03-22  1:22 ` [patch 03/10] page-flags: fix a crash at SetPageError(THP_SWAP) Andrew Morton
2020-03-22  1:22 ` [patch 04/10] mm, memcg: fix corruption on 64-bit divisor in memory.high throttling Andrew Morton
2020-03-22  1:22 ` Andrew Morton [this message]
2020-03-22  1:22 ` [patch 06/10] mm: do not allow MADV_PAGEOUT for CoW pages Andrew Morton
2020-03-22  1:22 ` [patch 07/10] epoll: fix possible lost wakeup on epoll_ctl() path Andrew Morton
2020-03-22  1:22 ` [patch 08/10] mm/mmu_notifier: silence PROVE_RCU_LIST warnings Andrew Morton
2020-03-22  1:22 ` [patch 09/10] mm, slub: prevent kmalloc_node crashes and memory leaks Andrew Morton
2020-03-22  1:22 ` [patch 10/10] x86/mm: split vmalloc_sync_all() Andrew Morton
2020-03-22  1:39 ` + tools-testing-selftests-vm-mlock2-tests-fix-mlock2-false-negative-errors.patch added to -mm tree Andrew Morton
2020-03-22  4:39 ` + libfs-fix-infoleak-in-simple_attr_read.patch " Andrew Morton
2020-03-22  4:41 ` + bus-mhi-fix-printk-format-for-size_t.patch " Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200322012223.ExM_gA7dC%akpm@linux-foundation.org \
    --to=akpm@linux-foundation.org \
    --cc=chris@chrisdown.name \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=mm-commits@vger.kernel.org \
    --cc=natechancellor@gmail.com \
    --cc=stable@vger.kernel.org \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

mm-commits Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/mm-commits/0 mm-commits/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 mm-commits mm-commits/ https://lore.kernel.org/mm-commits \
		mm-commits@vger.kernel.org
	public-inbox-index mm-commits

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.mm-commits


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git