All of lore.kernel.org
 help / color / mirror / Atom feed
From: Feng Tang <feng.tang@intel.com>
To: Michal Hocko <mhocko@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	David Rientjes <rientjes@google.com>,
	Mel Gorman <mgorman@techsingularity.net>,
	Vlastimil Babka <vbabka@suse.cz>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] mm/page_alloc: detect allocation forbidden by cpuset and bail out early
Date: Fri, 10 Sep 2021 17:21:32 +0800	[thread overview]
Message-ID: <20210910092132.GA54659@shbuild999.sh.intel.com> (raw)
In-Reply-To: <YTsYxbMhGIunUPZr@dhcp22.suse.cz>

On Fri, Sep 10, 2021 at 10:35:17AM +0200, Michal Hocko wrote:
[...]
> >  
> > +static inline bool cpusets_abnormal_check_needed(void)
> 
> I would go with cpusets_insane_config with a comment explaining what
> that means. I would also do a pr_info() when the static branch is
> enabled.
> 
> [...]
> 
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 4e455fa..5728675 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -4919,7 +4919,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> >  	 * any suitable zone to satisfy the request - e.g. non-movable
> >  	 * GFP_HIGHUSER allocations from MOVABLE nodes only.
> >  	 */
> > -	if (cpusets_enabled() && (gfp_mask & __GFP_HARDWALL)) {
> > +	if (cpusets_enabled() &&
> > +		cpusets_abnormal_check_needed() &&
> 
> You do not need cpusets_enabled check here. Remember the primary point
> is to not introduce any branch unless a dubious configuration is in
> place.

Thanks for the review, patch updated below. Also should we combine
this one with the original detection patch?

Thanks,
Feng

---
 include/linux/cpuset.h | 13 +++++++++++++
 include/linux/mmzone.h | 14 ++++++++++++++
 kernel/cgroup/cpuset.c | 13 +++++++++++++
 mm/page_alloc.c        |  2 +-
 4 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index d2b9c41..95bacec 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -34,6 +34,8 @@
  */
 extern struct static_key_false cpusets_pre_enable_key;
 extern struct static_key_false cpusets_enabled_key;
+extern struct static_key_false cpusets_insane_config_key;
+
 static inline bool cpusets_enabled(void)
 {
 	return static_branch_unlikely(&cpusets_enabled_key);
@@ -51,6 +53,17 @@ static inline void cpuset_dec(void)
 	static_branch_dec_cpuslocked(&cpusets_pre_enable_key);
 }
 
+/*
+ * Check if there has been insane configurations. E.g. there was usages
+ * which binds a docker OS to memory nodes with only movable zones, which
+ * causes system to behave abnormally, as the usage triggers many innocent
+ * processes get oom-killed.
+ */
+static inline bool cpusets_insane_config(void)
+{
+	return static_branch_unlikely(&cpusets_insane_config_key);
+}
+
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
 extern void cpuset_force_rebuild(void);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6a1d79d..c3f5527 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1116,6 +1116,20 @@ extern struct zone *next_zone(struct zone *zone);
 			; /* do nothing */		\
 		else
 
+/* Whether the 'nodes' are all movable nodes */
+static inline bool movable_only_nodes(nodemask_t *nodes)
+{
+	struct zone *zone;
+
+	for_each_populated_zone(zone) {
+		if (zone_idx(zone) != ZONE_MOVABLE &&
+			node_isset(zone_to_nid(zone), *nodes))
+			return false;
+	}
+
+	return true;
+}
+
 static inline struct zone *zonelist_zone(struct zoneref *zoneref)
 {
 	return zoneref->zone;
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index df1ccf4..e0cb12e 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -69,6 +69,13 @@
 DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
 DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
 
+/*
+ * There could be abnormal cpuset configurations for cpu or memory
+ * node binding, add this key to provide a quick low-cost judgement
+ * of the situation.
+ */
+DEFINE_STATIC_KEY_FALSE(cpusets_insane_config_key);
+
 /* See "Frequency meter" comments, below. */
 
 struct fmeter {
@@ -1868,6 +1875,12 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
 	if (retval < 0)
 		goto done;
 
+	if (movable_only_nodes(&trialcs->mems_allowed)) {
+		static_branch_enable(&cpusets_insane_config_key);
+		pr_info("cpuset: See abornal binding to movable nodes only(nmask=%*pbl)\n",
+			nodemask_pr_args(&trialcs->mems_allowed));
+	}
+
 	spin_lock_irq(&callback_lock);
 	cs->mems_allowed = trialcs->mems_allowed;
 	spin_unlock_irq(&callback_lock);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e455fa..a7e0854 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4919,7 +4919,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	 * any suitable zone to satisfy the request - e.g. non-movable
 	 * GFP_HIGHUSER allocations from MOVABLE nodes only.
 	 */
-	if (cpusets_enabled() && (gfp_mask & __GFP_HARDWALL)) {
+	if (cpusets_insane_config() && (gfp_mask & __GFP_HARDWALL)) {
 		struct zoneref *z = first_zones_zonelist(ac->zonelist,
 					ac->highest_zoneidx,
 					&cpuset_current_mems_allowed);
-- 
2.7.4


  reply	other threads:[~2021-09-10  9:21 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-07  8:25 [PATCH] mm/page_alloc: detect allocation forbidden by cpuset and bail out early Feng Tang
2021-09-07  8:44 ` Michal Hocko
2021-09-08  1:50   ` Feng Tang
2021-09-08  7:06     ` Michal Hocko
2021-09-08  8:12       ` Feng Tang
2021-09-10  7:44       ` Feng Tang
2021-09-10  8:35         ` Michal Hocko
2021-09-10  9:21           ` Feng Tang [this message]
2021-09-10 10:35             ` Michal Hocko
2021-09-10 11:29               ` Feng Tang
2021-09-10 11:43                 ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210910092132.GA54659@shbuild999.sh.intel.com \
    --to=feng.tang@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@suse.com \
    --cc=rientjes@google.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.