All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yafang Shao <laoar.shao@gmail.com>
To: akpm@linux-foundation.org, hannes@cmpxchg.org, mhocko@kernel.org,
	guro@fb.com, chris@chrisdown.name
Cc: linux-mm@kvack.org, Yafang Shao <laoar.shao@gmail.com>
Subject: [PATCH 3/3] mm: improvements on memcg protection functions
Date: Sat, 25 Apr 2020 11:24:18 -0400	[thread overview]
Message-ID: <20200425152418.28388-4-laoar.shao@gmail.com> (raw)
In-Reply-To: <20200425152418.28388-1-laoar.shao@gmail.com>

Since proportional memory.{min, low} reclaim is introduced in
commit 9783aa9917f8 ("mm, memcg: proportional memory.{low,min} reclaim"),
it have been proved that the proportional reclaim is hard to understand and
the issues caused by it is harder to understand.[1]. That dilemma faced by
us is caused by that the proportional reclaim mixed up memcg and the
reclaim context.

In proportional reclaim, the whole reclaim context - includes the memcg
to be reclaimed and the reclaimer, should be considered, rather than
memcg only.

To make it clear, a new member 'protection' is introduced in the reclaim
context (struct shrink_control) to replace mem_cgroup_protection(). This
one is set when we check whether the memcg is protected or not.

After this change, the issue pointed by me[1] - a really old left-over
value can slow donw target reclaim - can be fixed, and I think it could
also avoid some potential race.

[1]. https://lore.kernel.org/linux-mm/20200423061629.24185-1-laoar.shao@gmail.com

Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: Chris Down <chris@chrisdown.name>
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/linux/memcontrol.h | 25 ----------------
 mm/internal.h              | 17 +++++++++++
 mm/memcontrol.c            | 58 +++++++++++++++++++++++++++-----------
 mm/vmscan.c                | 35 +++--------------------
 4 files changed, 63 insertions(+), 72 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b327857a1e7e..9d5ceeba3b31 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -50,12 +50,6 @@ enum memcg_memory_event {
 	MEMCG_NR_MEMORY_EVENTS,
 };
 
-enum mem_cgroup_protection {
-	MEMCG_PROT_NONE,
-	MEMCG_PROT_LOW,
-	MEMCG_PROT_MIN,
-};
-
 struct mem_cgroup_reclaim_cookie {
 	pg_data_t *pgdat;
 	unsigned int generation;
@@ -344,19 +338,6 @@ static inline bool mem_cgroup_disabled(void)
 	return !cgroup_subsys_enabled(memory_cgrp_subsys);
 }
 
-static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
-						  bool in_low_reclaim)
-{
-	if (mem_cgroup_disabled())
-		return 0;
-
-	if (in_low_reclaim)
-		return READ_ONCE(memcg->memory.emin);
-
-	return max(READ_ONCE(memcg->memory.emin),
-		   READ_ONCE(memcg->memory.elow));
-}
-
 int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 			  gfp_t gfp_mask, struct mem_cgroup **memcgp,
 			  bool compound);
@@ -832,12 +813,6 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
 {
 }
 
-static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
-						  bool in_low_reclaim)
-{
-	return 0;
-}
-
 static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask,
 					struct mem_cgroup **memcgp,
diff --git a/mm/internal.h b/mm/internal.h
index a0b3bdd933b9..10c762a79c0c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -271,6 +271,9 @@ struct scan_control {
 	 */
 	struct mem_cgroup *target_mem_cgroup;
 
+	/* Memcg protection in this reclaim context */
+	unsigned long protection;
+
 	/* Can active pages be deactivated as part of reclaim? */
 #define DEACTIVATE_ANON 1
 #define DEACTIVATE_FILE 2
@@ -338,6 +341,20 @@ struct scan_control {
 	struct reclaim_state reclaim_state;
 };
 
+#ifdef CONFIG_MEMCG
+bool mem_cgroup_protected(struct mem_cgroup *target,
+			  struct mem_cgroup *memcg,
+			  struct scan_control *sc);
+
+#else
+static inline bool mem_cgroup_protected(struct mem_cgroup *target,
+					struct mem_cgroup *memcg,
+					struct scan_control *sc)
+{
+	return false;
+}
+#endif
+
 /*
  * This function returns the order of a free page in the buddy system. In
  * general, page_zone(page)->lock must be held by the caller to prevent the
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 51dab7f2e714..f2f191898f2b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6372,35 +6372,30 @@ static unsigned long effective_protection(unsigned long usage,
  * WARNING: This function is not stateless! It can only be used as part
  *          of a top-down tree iteration, not for isolated queries.
  *
- * Returns one of the following:
- *   MEMCG_PROT_NONE: cgroup memory is not protected
- *   MEMCG_PROT_LOW: cgroup memory is protected as long there is
- *     an unprotected supply of reclaimable memory from other cgroups.
- *   MEMCG_PROT_MIN: cgroup memory is protected
  */
-enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *target,
-						struct mem_cgroup *memcg,
-						struct scan_control *sc)
+bool mem_cgroup_protected(struct mem_cgroup *target,
+			  struct mem_cgroup *memcg,
+			  struct scan_control *sc)
 {
 	unsigned long usage, parent_usage;
 	struct mem_cgroup *parent;
 
 	if (mem_cgroup_disabled())
-		return MEMCG_PROT_NONE;
+		return false;
 
 	if (!target)
 		target = root_mem_cgroup;
 	if (memcg == target)
-		return MEMCG_PROT_NONE;
+		return false;
 
 	usage = page_counter_read(&memcg->memory);
 	if (!usage)
-		return MEMCG_PROT_NONE;
+		return false;
 
 	parent = parent_mem_cgroup(memcg);
 	/* No parent means a non-hierarchical mode on v1 memcg */
 	if (!parent)
-		return MEMCG_PROT_NONE;
+		return false;
 
 	if (parent == target) {
 		memcg->memory.emin = READ_ONCE(memcg->memory.min);
@@ -6420,12 +6415,43 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *target,
 			atomic_long_read(&parent->memory.children_low_usage)));
 
 out:
+	/*
+	 * Hard protection.
+	 * If there is no reclaimable memory, OOM.
+	 */
 	if (usage <= memcg->memory.emin)
-		return MEMCG_PROT_MIN;
-	else if (usage <= memcg->memory.elow)
-		return MEMCG_PROT_LOW;
+		return true;
+
+	/* The protection takes effect when false is returned. */
+	if (sc->memcg_low_reclaim)
+		sc->protection = memcg->memory.emin;
 	else
-		return MEMCG_PROT_NONE;
+		sc->protection = max(memcg->memory.emin, memcg->memory.elow);
+
+	if (usage <= memcg->memory.elow) {
+		/*
+		 * Soft protection.
+		 * Respect the protection only as long as there is an
+		 * unprotected supply of reclaimable memory from other
+		 * cgroups.
+		 */
+		if (!sc->memcg_low_reclaim) {
+			sc->memcg_low_skipped = 1;
+			return true;
+		}
+
+		memcg_memory_event(memcg, MEMCG_LOW);
+
+		return false;
+	}
+
+	/*
+	 * All protection thresholds breached. We may still choose to vary
+	 * the scan pressure applied based on by how much the cgroup in
+	 * question has exceeded its protection thresholds
+	 * (see get_scan_count).
+	 */
+	return false;
 }
 
 /**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 61c944e7f587..a81bf736ac11 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2263,8 +2263,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 		unsigned long protection;
 
 		lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
-		protection = mem_cgroup_protection(memcg,
-						   sc->memcg_low_reclaim);
+		protection = sc->protection;
 
 		if (protection) {
 			/*
@@ -2551,36 +2550,10 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 		unsigned long reclaimed;
 		unsigned long scanned;
 
-		switch (mem_cgroup_protected(target_memcg, memcg, sc)) {
-		case MEMCG_PROT_MIN:
-			/*
-			 * Hard protection.
-			 * If there is no reclaimable memory, OOM.
-			 */
+		sc->protection = 0;
+
+		if (mem_cgroup_protected(target_memcg, memcg, sc))
 			continue;
-		case MEMCG_PROT_LOW:
-			/*
-			 * Soft protection.
-			 * Respect the protection only as long as
-			 * there is an unprotected supply
-			 * of reclaimable memory from other cgroups.
-			 */
-			if (!sc->memcg_low_reclaim) {
-				sc->memcg_low_skipped = 1;
-				continue;
-			}
-			memcg_memory_event(memcg, MEMCG_LOW);
-			break;
-		case MEMCG_PROT_NONE:
-			/*
-			 * All protection thresholds breached. We may
-			 * still choose to vary the scan pressure
-			 * applied based on by how much the cgroup in
-			 * question has exceeded its protection
-			 * thresholds (see get_scan_count).
-			 */
-			break;
-		}
 
 		reclaimed = sc->nr_reclaimed;
 		scanned = sc->nr_scanned;
-- 
2.18.2



  parent reply	other threads:[~2020-04-25 15:24 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-25 15:24 [PATCH 0/3] mm: improve proportional memcg protection Yafang Shao
2020-04-25 15:24 ` [PATCH 1/3] mm: move struct scan_control into internal.h Yafang Shao
2020-04-25 15:24 ` [PATCH 2/3] mm: add reclaim context as a new parameter in mem_cgroup_protected() Yafang Shao
2020-04-25 15:24 ` Yafang Shao [this message]
2020-04-27  9:40   ` [PATCH 3/3] mm: improvements on memcg protection functions Michal Hocko
2020-04-27 10:09     ` Yafang Shao
2020-04-27 10:50       ` Michal Hocko
2020-04-27 11:06         ` Yafang Shao
2020-04-27 11:24           ` Michal Hocko
2020-04-27 11:32             ` Yafang Shao
2020-04-27 17:05 ` [PATCH 0/3] mm: improve proportional memcg protection Johannes Weiner
2020-04-28  1:45   ` Yafang Shao
2020-04-28  3:37     ` Johannes Weiner
2020-04-28  6:00       ` Yafang Shao
2020-04-28  8:05     ` Michal Hocko
2020-04-28  8:22       ` Yafang Shao
2020-04-28 10:43         ` Michal Hocko
2020-04-28 12:25           ` Yafang Shao
2020-04-28 12:42             ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200425152418.28388-4-laoar.shao@gmail.com \
    --to=laoar.shao@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=chris@chrisdown.name \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.