linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tim Chen <tim.c.chen@linux.intel.com>
To: Michal Hocko <mhocko@suse.cz>
Cc: Tim Chen <tim.c.chen@linux.intel.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Dave Hansen <dave.hansen@intel.com>,
	Ying Huang <ying.huang@intel.com>,
	Dan Williams <dan.j.williams@intel.com>,
	David Rientjes <rientjes@google.com>,
	Shakeel Butt <shakeelb@google.com>,
	linux-mm@kvack.org, cgroups@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH v1 07/11] mm: Account the total top tier memory in use
Date: Mon,  5 Apr 2021 10:08:31 -0700	[thread overview]
Message-ID: <9170c90b0f58dee05a2b2c1d3789d674df42ed65.1617642417.git.tim.c.chen@linux.intel.com> (raw)
In-Reply-To: <cover.1617642417.git.tim.c.chen@linux.intel.com>

Track the global top tier memory usage stats. They are used as the basis of
deciding when to start demoting pages from memory cgroups that have exceeded
their soft limit.  We start reclaiming top tier memory when the total
top tier memory is low.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 include/linux/vmstat.h | 18 ++++++++++++++++++
 mm/vmstat.c            | 20 +++++++++++++++++---
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index e1a4fa9abb3a..a3ad5a937fd8 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -139,6 +139,7 @@ static inline void vm_events_fold_cpu(int cpu)
  * Zone and node-based page accounting with per cpu differentials.
  */
 extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
+extern atomic_long_t vm_toptier_zone_stat[NR_VM_ZONE_STAT_ITEMS];
 extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
 extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
 
@@ -175,6 +176,8 @@ static inline void zone_page_state_add(long x, struct zone *zone,
 {
 	atomic_long_add(x, &zone->vm_stat[item]);
 	atomic_long_add(x, &vm_zone_stat[item]);
+	if (node_state(zone->zone_pgdat->node_id, N_TOPTIER))
+		atomic_long_add(x, &vm_toptier_zone_stat[item]);
 }
 
 static inline void node_page_state_add(long x, struct pglist_data *pgdat,
@@ -212,6 +215,17 @@ static inline unsigned long global_node_page_state(enum node_stat_item item)
 	return global_node_page_state_pages(item);
 }
 
+static inline unsigned long global_toptier_zone_page_state(enum zone_stat_item item)
+{
+	long x = atomic_long_read(&vm_toptier_zone_stat[item]);
+
+#ifdef CONFIG_SMP
+	if (x < 0)
+		x = 0;
+#endif
+	return x;
+}
+
 static inline unsigned long zone_page_state(struct zone *zone,
 					enum zone_stat_item item)
 {
@@ -325,6 +339,8 @@ static inline void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 {
 	atomic_long_inc(&zone->vm_stat[item]);
 	atomic_long_inc(&vm_zone_stat[item]);
+	if (node_state(zone->zone_pgdat->node_id, N_TOPTIER))
+		atomic_long_inc(&vm_toptier_zone_stat[item]);
 }
 
 static inline void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -337,6 +353,8 @@ static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 {
 	atomic_long_dec(&zone->vm_stat[item]);
 	atomic_long_dec(&vm_zone_stat[item]);
+	if (node_state(zone->zone_pgdat->node_id, N_TOPTIER))
+		atomic_long_dec(&vm_toptier_zone_stat[item]);
 }
 
 static inline void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f299d2e89acb..b59efbcaef4e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -161,9 +161,11 @@ void vm_events_fold_cpu(int cpu)
  * vm_stat contains the global counters
  */
 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
+atomic_long_t vm_toptier_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
 atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
 EXPORT_SYMBOL(vm_zone_stat);
+EXPORT_SYMBOL(vm_toptier_zone_stat);
 EXPORT_SYMBOL(vm_numa_stat);
 EXPORT_SYMBOL(vm_node_stat);
 
@@ -695,7 +697,7 @@ EXPORT_SYMBOL(dec_node_page_state);
  * Returns the number of counters updated.
  */
 #ifdef CONFIG_NUMA
-static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
+static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff, int *toptier_diff)
 {
 	int i;
 	int changes = 0;
@@ -717,6 +719,11 @@ static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
 			atomic_long_add(node_diff[i], &vm_node_stat[i]);
 			changes++;
 	}
+
+	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+		if (toptier_diff[i]) {
+			atomic_long_add(toptier_diff[i], &vm_toptier_zone_stat[i]);
+	}
 	return changes;
 }
 #else
@@ -762,6 +769,7 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
 	struct zone *zone;
 	int i;
 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+	int global_toptier_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
 #ifdef CONFIG_NUMA
 	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
 #endif
@@ -779,6 +787,9 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
 
 				atomic_long_add(v, &zone->vm_stat[i]);
 				global_zone_diff[i] += v;
+				if (node_state(zone->zone_pgdat->node_id, N_TOPTIER)) {
+					global_toptier_diff[i] +=v;
+				}
 #ifdef CONFIG_NUMA
 				/* 3 seconds idle till flush */
 				__this_cpu_write(p->expire, 3);
@@ -846,7 +857,7 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
 
 #ifdef CONFIG_NUMA
 	changes += fold_diff(global_zone_diff, global_numa_diff,
-			     global_node_diff);
+			     global_node_diff, global_toptier_diff);
 #else
 	changes += fold_diff(global_zone_diff, global_node_diff);
 #endif
@@ -868,6 +879,7 @@ void cpu_vm_stats_fold(int cpu)
 	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
 #endif
 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
+	int global_toptier_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
 
 	for_each_populated_zone(zone) {
 		struct per_cpu_pageset *p;
@@ -910,11 +922,13 @@ void cpu_vm_stats_fold(int cpu)
 				p->vm_node_stat_diff[i] = 0;
 				atomic_long_add(v, &pgdat->vm_stat[i]);
 				global_node_diff[i] += v;
+				if (node_state(pgdat->node_id, N_TOPTIER))
+					global_toptier_diff[i] +=v;
 			}
 	}
 
 #ifdef CONFIG_NUMA
-	fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
+	fold_diff(global_zone_diff, global_numa_diff, global_node_diff, global_toptier_diff);
 #else
 	fold_diff(global_zone_diff, global_node_diff);
 #endif
-- 
2.20.1


  parent reply	other threads:[~2021-04-05 18:09 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-05 17:08 [RFC PATCH v1 00/11] Manage the top tier memory in a tiered memory Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 01/11] mm: Define top tier memory node mask Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 02/11] mm: Add soft memory limit for mem cgroup Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 03/11] mm: Account the top tier memory usage per cgroup Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 04/11] mm: Report top tier memory usage in sysfs Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 05/11] mm: Add soft_limit_top_tier tree for mem cgroup Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 06/11] mm: Handle top tier memory in cgroup soft limit memory tree utilities Tim Chen
2021-04-05 17:08 ` Tim Chen [this message]
2021-04-05 17:08 ` [RFC PATCH v1 08/11] mm: Add toptier option for mem_cgroup_soft_limit_reclaim() Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 09/11] mm: Use kswapd to demote pages when toptier memory is tight Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 10/11] mm: Set toptier_scale_factor via sysctl Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 11/11] mm: Wakeup kswapd if toptier memory need soft reclaim Tim Chen
2021-04-06  9:08 ` [RFC PATCH v1 00/11] Manage the top tier memory in a tiered memory Michal Hocko
2021-04-07 22:33   ` Tim Chen
2021-04-08 11:52     ` Michal Hocko
2021-04-09 23:26       ` Tim Chen
2021-04-12 19:20         ` Shakeel Butt
2021-04-14  8:59           ` Jonathan Cameron
2021-04-15  0:42           ` Tim Chen
2021-04-13  2:15         ` Huang, Ying
2021-04-13  8:33         ` Michal Hocko
2021-04-12 14:03       ` Shakeel Butt
2021-04-08 17:18 ` Shakeel Butt
2021-04-08 18:00   ` Yang Shi
2021-04-08 20:29     ` Shakeel Butt
2021-04-08 20:50       ` Yang Shi
2021-04-12 14:03         ` Shakeel Butt
2021-04-09  7:24       ` Michal Hocko
2021-04-15 22:31         ` Tim Chen
2021-04-16  6:38           ` Michal Hocko
2021-04-14 23:22       ` Tim Chen
2021-04-09  2:58     ` Huang, Ying
2021-04-09 20:50       ` Yang Shi
2021-04-15 22:25   ` Tim Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9170c90b0f58dee05a2b2c1d3789d674df42ed65.1617642417.git.tim.c.chen@linux.intel.com \
    --to=tim.c.chen@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.cz \
    --cc=rientjes@google.com \
    --cc=shakeelb@google.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).