* [PATCH v2 07/12] mm: memcontrol: convert kernel stack account to bytes
@ 2020-12-06 8:29 Muchun Song
2020-12-06 8:29 ` [PATCH v2 08/12] mm: memcontrol: convert NR_KERNEL_SCS_KB " Muchun Song
` (4 more replies)
0 siblings, 5 replies; 6+ messages in thread
From: Muchun Song @ 2020-12-06 8:29 UTC (permalink / raw)
To: gregkh, rafael, adobriyan, akpm, hannes, mhocko, vdavydov.dev,
hughd, will, guro, rppt, tglx, esyr, peterx, krisman, surenb,
avagin, elver, rdunlap, iamjoonsoo.kim
Cc: linux-kernel, linux-fsdevel, linux-mm, cgroups, Muchun Song
The kernel stack account is the one that counts in KiB. This patch
convert it from KiB to byte.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
drivers/base/node.c | 2 +-
fs/proc/meminfo.c | 2 +-
include/linux/mmzone.h | 2 +-
kernel/fork.c | 8 ++++----
mm/memcontrol.c | 2 +-
mm/page_alloc.c | 2 +-
6 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index f77652e6339f..92a75bad35c9 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -446,7 +446,7 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
- nid, node_page_state(pgdat, NR_KERNEL_STACK_KB),
+ nid, node_page_state(pgdat, NR_KERNEL_STACK_B) / SZ_1K,
#ifdef CONFIG_SHADOW_CALL_STACK
nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
#endif
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 5a83012d8b72..799a537d4218 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -101,7 +101,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SReclaimable: ", sreclaimable);
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
- global_node_page_state(NR_KERNEL_STACK_KB));
+ global_node_page_state(NR_KERNEL_STACK_B) / SZ_1K);
#ifdef CONFIG_SHADOW_CALL_STACK
seq_printf(m, "ShadowCallStack:%8lu kB\n",
global_node_page_state(NR_KERNEL_SCS_KB));
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 15132adaa233..bd34416293ec 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -202,7 +202,7 @@ enum node_stat_item {
NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */
NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */
NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */
- NR_KERNEL_STACK_KB, /* measured in KiB */
+ NR_KERNEL_STACK_B, /* measured in byte */
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
NR_KERNEL_SCS_KB, /* measured in KiB */
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 345f378e104d..2913d7c43dcb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -382,11 +382,11 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
/* All stack pages are in the same node. */
if (vm)
- mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB,
- account * (THREAD_SIZE / 1024));
+ mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_B,
+ account * THREAD_SIZE);
else
- mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB,
- account * (THREAD_SIZE / 1024));
+ mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_B,
+ account * THREAD_SIZE);
}
static int memcg_charge_kernel_stack(struct task_struct *tsk)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6d4365d2fd1c..48d70c1ad301 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1500,7 +1500,7 @@ struct memory_stat {
static const struct memory_stat memory_stats[] = {
{ "anon", PAGE_SIZE, NR_ANON_MAPPED },
{ "file", PAGE_SIZE, NR_FILE_PAGES },
- { "kernel_stack", 1024, NR_KERNEL_STACK_KB },
+ { "kernel_stack", 1, NR_KERNEL_STACK_B },
{ "percpu", 1, MEMCG_PERCPU_B },
{ "sock", PAGE_SIZE, MEMCG_SOCK },
{ "shmem", PAGE_SIZE, NR_SHMEM },
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d103513b3e4f..d2821ba7f682 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5572,7 +5572,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(node_page_state(pgdat, NR_ANON_THPS)),
#endif
K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
- node_page_state(pgdat, NR_KERNEL_STACK_KB),
+ node_page_state(pgdat, NR_KERNEL_STACK_B) / SZ_1K,
#ifdef CONFIG_SHADOW_CALL_STACK
node_page_state(pgdat, NR_KERNEL_SCS_KB),
#endif
--
2.11.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 08/12] mm: memcontrol: convert NR_KERNEL_SCS_KB account to bytes
2020-12-06 8:29 [PATCH v2 07/12] mm: memcontrol: convert kernel stack account to bytes Muchun Song
@ 2020-12-06 8:29 ` Muchun Song
2020-12-06 8:29 ` [PATCH v2 09/12] mm: memcontrol: convert vmstat slab counters " Muchun Song
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Muchun Song @ 2020-12-06 8:29 UTC (permalink / raw)
To: gregkh, rafael, adobriyan, akpm, hannes, mhocko, vdavydov.dev,
hughd, will, guro, rppt, tglx, esyr, peterx, krisman, surenb,
avagin, elver, rdunlap, iamjoonsoo.kim
Cc: linux-kernel, linux-fsdevel, linux-mm, cgroups, Muchun Song
Convert NR_KERNEL_SCS_KB account to bytes
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
drivers/base/node.c | 2 +-
fs/proc/meminfo.c | 2 +-
include/linux/mmzone.h | 2 +-
kernel/scs.c | 4 ++--
mm/page_alloc.c | 2 +-
5 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 92a75bad35c9..bc01ce0b2fcd 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -448,7 +448,7 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(i.sharedram),
nid, node_page_state(pgdat, NR_KERNEL_STACK_B) / SZ_1K,
#ifdef CONFIG_SHADOW_CALL_STACK
- nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
+ nid, node_page_state(pgdat, NR_KERNEL_SCS_B) / SZ_1K,
#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, 0UL,
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 799a537d4218..69895e83d4fc 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -104,7 +104,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
global_node_page_state(NR_KERNEL_STACK_B) / SZ_1K);
#ifdef CONFIG_SHADOW_CALL_STACK
seq_printf(m, "ShadowCallStack:%8lu kB\n",
- global_node_page_state(NR_KERNEL_SCS_KB));
+ global_node_page_state(NR_KERNEL_SCS_B) / SZ_1K);
#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bd34416293ec..1f9c83778629 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -204,7 +204,7 @@ enum node_stat_item {
NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */
NR_KERNEL_STACK_B, /* measured in byte */
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
- NR_KERNEL_SCS_KB, /* measured in KiB */
+ NR_KERNEL_SCS_B, /* measured in byte */
#endif
NR_VM_NODE_STAT_ITEMS
};
diff --git a/kernel/scs.c b/kernel/scs.c
index 4ff4a7ba0094..8db89c932ddc 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -17,8 +17,8 @@ static void __scs_account(void *s, int account)
{
struct page *scs_page = virt_to_page(s);
- mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB,
- account * (SCS_SIZE / SZ_1K));
+ mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_B,
+ account * SCS_SIZE);
}
static void *scs_alloc(int node)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d2821ba7f682..58916b3afdab 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5574,7 +5574,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
node_page_state(pgdat, NR_KERNEL_STACK_B) / SZ_1K,
#ifdef CONFIG_SHADOW_CALL_STACK
- node_page_state(pgdat, NR_KERNEL_SCS_KB),
+ node_page_state(pgdat, NR_KERNEL_SCS_B) / SZ_1K,
#endif
pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
"yes" : "no");
--
2.11.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 09/12] mm: memcontrol: convert vmstat slab counters to bytes
2020-12-06 8:29 [PATCH v2 07/12] mm: memcontrol: convert kernel stack account to bytes Muchun Song
2020-12-06 8:29 ` [PATCH v2 08/12] mm: memcontrol: convert NR_KERNEL_SCS_KB " Muchun Song
@ 2020-12-06 8:29 ` Muchun Song
2020-12-06 8:29 ` [PATCH v2 10/12] mm: memcontrol: scale stat_threshold for byted-sized vmstat Muchun Song
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Muchun Song @ 2020-12-06 8:29 UTC (permalink / raw)
To: gregkh, rafael, adobriyan, akpm, hannes, mhocko, vdavydov.dev,
hughd, will, guro, rppt, tglx, esyr, peterx, krisman, surenb,
avagin, elver, rdunlap, iamjoonsoo.kim
Cc: linux-kernel, linux-fsdevel, linux-mm, cgroups, Muchun Song
the global and per-node counters are stored in pages, however memcg
and lruvec counters are stored in bytes. This scheme looks weird.
So convert all vmstat slab counters to bytes.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
include/linux/vmstat.h | 17 ++++++++++-------
mm/vmstat.c | 21 ++++++++++-----------
2 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 322dcbfcc933..fd1a3d5d4926 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -197,18 +197,26 @@ static inline
unsigned long global_node_page_state_pages(enum node_stat_item item)
{
long x = atomic_long_read(&vm_node_stat[item]);
+
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
#endif
+ if (vmstat_item_in_bytes(item))
+ x >>= PAGE_SHIFT;
return x;
}
static inline unsigned long global_node_page_state(enum node_stat_item item)
{
- VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
+ long x = atomic_long_read(&vm_node_stat[item]);
- return global_node_page_state_pages(item);
+ VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
}
static inline unsigned long zone_page_state(struct zone *zone,
@@ -312,11 +320,6 @@ static inline void __mod_zone_page_state(struct zone *zone,
static inline void __mod_node_page_state(struct pglist_data *pgdat,
enum node_stat_item item, int delta)
{
- if (vmstat_item_in_bytes(item)) {
- VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
- delta >>= PAGE_SHIFT;
- }
-
node_page_state_add(delta, pgdat, item);
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8d77ee426e22..7fb0c7cb9516 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -345,11 +345,6 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
long x;
long t;
- if (vmstat_item_in_bytes(item)) {
- VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
- delta >>= PAGE_SHIFT;
- }
-
x = delta + __this_cpu_read(*p);
t = __this_cpu_read(pcp->stat_threshold);
@@ -554,11 +549,6 @@ static inline void mod_node_state(struct pglist_data *pgdat,
s8 __percpu *p = pcp->vm_node_stat_diff + item;
long o, n, t, z;
- if (vmstat_item_in_bytes(item)) {
- VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
- delta >>= PAGE_SHIFT;
- }
-
do {
z = 0; /* overflow to node counters */
@@ -1012,19 +1002,28 @@ unsigned long node_page_state_pages(struct pglist_data *pgdat,
enum node_stat_item item)
{
long x = atomic_long_read(&pgdat->vm_stat[item]);
+
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
#endif
+ if (vmstat_item_in_bytes(item))
+ x >>= PAGE_SHIFT;
return x;
}
unsigned long node_page_state(struct pglist_data *pgdat,
enum node_stat_item item)
{
+ long x = atomic_long_read(&pgdat->vm_stat[item]);
+
VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
- return node_page_state_pages(pgdat, item);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
}
#endif
--
2.11.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 10/12] mm: memcontrol: scale stat_threshold for byted-sized vmstat
2020-12-06 8:29 [PATCH v2 07/12] mm: memcontrol: convert kernel stack account to bytes Muchun Song
2020-12-06 8:29 ` [PATCH v2 08/12] mm: memcontrol: convert NR_KERNEL_SCS_KB " Muchun Song
2020-12-06 8:29 ` [PATCH v2 09/12] mm: memcontrol: convert vmstat slab counters " Muchun Song
@ 2020-12-06 8:29 ` Muchun Song
2020-12-06 8:29 ` [PATCH v2 11/12] mm: memcontrol: make the slab calculation consistent Muchun Song
2020-12-06 8:29 ` [PATCH v2 12/12] mm: memcontrol: remove {global_}node_page_state_pages Muchun Song
4 siblings, 0 replies; 6+ messages in thread
From: Muchun Song @ 2020-12-06 8:29 UTC (permalink / raw)
To: gregkh, rafael, adobriyan, akpm, hannes, mhocko, vdavydov.dev,
hughd, will, guro, rppt, tglx, esyr, peterx, krisman, surenb,
avagin, elver, rdunlap, iamjoonsoo.kim
Cc: linux-kernel, linux-fsdevel, linux-mm, cgroups, Muchun Song
Some vmstat counters are being accounted in bytes not pages, so the
stat_threshold should also scale to bytes.
The vmstat counters are already long type for memcg (can reference
to struct lruvec_stat). For the global per-node vmstat counters
also can scale to long. But the maximum vmstat threshold is 125,
so the type of s32 is enough.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
include/linux/mmzone.h | 17 ++++++-----------
include/linux/vmstat.h | 1 -
mm/vmstat.c | 24 +++++++++++++-----------
3 files changed, 19 insertions(+), 23 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1f9c83778629..d53328551225 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -216,17 +216,12 @@ enum node_stat_item {
*/
static __always_inline bool vmstat_item_in_bytes(int idx)
{
- /*
- * Global and per-node slab counters track slab pages.
- * It's expected that changes are multiples of PAGE_SIZE.
- * Internally values are stored in pages.
- *
- * Per-memcg and per-lruvec counters track memory, consumed
- * by individual slab objects. These counters are actually
- * byte-precise.
- */
return (idx == NR_SLAB_RECLAIMABLE_B ||
- idx == NR_SLAB_UNRECLAIMABLE_B);
+ idx == NR_SLAB_UNRECLAIMABLE_B ||
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ idx == NR_KERNEL_SCS_B ||
+#endif
+ idx == NR_KERNEL_STACK_B);
}
/*
@@ -340,7 +335,7 @@ struct per_cpu_pageset {
struct per_cpu_nodestat {
s8 stat_threshold;
- s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS];
+ s32 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS];
};
#endif /* !__GENERATING_BOUNDS.H */
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index fd1a3d5d4926..afd84dc2398c 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -211,7 +211,6 @@ static inline unsigned long global_node_page_state(enum node_stat_item item)
{
long x = atomic_long_read(&vm_node_stat[item]);
- VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7fb0c7cb9516..25751b1d8e2e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -341,13 +341,15 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
long delta)
{
struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
- s8 __percpu *p = pcp->vm_node_stat_diff + item;
+ s32 __percpu *p = pcp->vm_node_stat_diff + item;
long x;
long t;
x = delta + __this_cpu_read(*p);
t = __this_cpu_read(pcp->stat_threshold);
+ if (vmstat_item_in_bytes(item))
+ t <<= PAGE_SHIFT;
if (unlikely(abs(x) > t)) {
node_page_state_add(x, pgdat, item);
@@ -399,15 +401,15 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
{
struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
- s8 __percpu *p = pcp->vm_node_stat_diff + item;
- s8 v, t;
+ s32 __percpu *p = pcp->vm_node_stat_diff + item;
+ s32 v, t;
VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
v = __this_cpu_inc_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v > t)) {
- s8 overstep = t >> 1;
+ s32 overstep = t >> 1;
node_page_state_add(v + overstep, pgdat, item);
__this_cpu_write(*p, -overstep);
@@ -445,8 +447,8 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
{
struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
- s8 __percpu *p = pcp->vm_node_stat_diff + item;
- s8 v, t;
+ s32 __percpu *p = pcp->vm_node_stat_diff + item;
+ s32 v, t;
VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
@@ -546,7 +548,7 @@ static inline void mod_node_state(struct pglist_data *pgdat,
enum node_stat_item item, int delta, int overstep_mode)
{
struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
- s8 __percpu *p = pcp->vm_node_stat_diff + item;
+ s32 __percpu *p = pcp->vm_node_stat_diff + item;
long o, n, t, z;
do {
@@ -563,6 +565,8 @@ static inline void mod_node_state(struct pglist_data *pgdat,
* for all cpus in a node.
*/
t = this_cpu_read(pcp->stat_threshold);
+ if (vmstat_item_in_bytes(item))
+ t <<= PAGE_SHIFT;
o = this_cpu_read(*p);
n = delta + o;
@@ -829,7 +833,7 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
- int v;
+ s32 v;
v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
if (v) {
@@ -899,7 +903,7 @@ void cpu_vm_stats_fold(int cpu)
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
if (p->vm_node_stat_diff[i]) {
- int v;
+ s32 v;
v = p->vm_node_stat_diff[i];
p->vm_node_stat_diff[i] = 0;
@@ -1017,8 +1021,6 @@ unsigned long node_page_state(struct pglist_data *pgdat,
{
long x = atomic_long_read(&pgdat->vm_stat[item]);
- VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
-
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
--
2.11.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 11/12] mm: memcontrol: make the slab calculation consistent
2020-12-06 8:29 [PATCH v2 07/12] mm: memcontrol: convert kernel stack account to bytes Muchun Song
` (2 preceding siblings ...)
2020-12-06 8:29 ` [PATCH v2 10/12] mm: memcontrol: scale stat_threshold for byted-sized vmstat Muchun Song
@ 2020-12-06 8:29 ` Muchun Song
2020-12-06 8:29 ` [PATCH v2 12/12] mm: memcontrol: remove {global_}node_page_state_pages Muchun Song
4 siblings, 0 replies; 6+ messages in thread
From: Muchun Song @ 2020-12-06 8:29 UTC (permalink / raw)
To: gregkh, rafael, adobriyan, akpm, hannes, mhocko, vdavydov.dev,
hughd, will, guro, rppt, tglx, esyr, peterx, krisman, surenb,
avagin, elver, rdunlap, iamjoonsoo.kim
Cc: linux-kernel, linux-fsdevel, linux-mm, cgroups, Muchun Song
Although the ratio of the slab is one, we also should read the ratio
from the related memory_stats instead of hard-coding. And the local
variable of size is already the value of slab_unreclaimable. So we
do not need to read again.
The unit of the vmstat counters are either pages or bytes now. So we
can drop the ratio in struct memory_stat. This can make the code clean
and simple. And get rid of the awkward mix of static and runtime
initialization of the memory_stats table.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
mm/memcontrol.c | 108 ++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 70 insertions(+), 38 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 48d70c1ad301..49fbcf003bf5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1493,48 +1493,71 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
struct memory_stat {
const char *name;
- unsigned int ratio;
unsigned int idx;
};
static const struct memory_stat memory_stats[] = {
- { "anon", PAGE_SIZE, NR_ANON_MAPPED },
- { "file", PAGE_SIZE, NR_FILE_PAGES },
- { "kernel_stack", 1, NR_KERNEL_STACK_B },
- { "percpu", 1, MEMCG_PERCPU_B },
- { "sock", PAGE_SIZE, MEMCG_SOCK },
- { "shmem", PAGE_SIZE, NR_SHMEM },
- { "file_mapped", PAGE_SIZE, NR_FILE_MAPPED },
- { "file_dirty", PAGE_SIZE, NR_FILE_DIRTY },
- { "file_writeback", PAGE_SIZE, NR_WRITEBACK },
+ { "anon", NR_ANON_MAPPED },
+ { "file", NR_FILE_PAGES },
+ { "kernel_stack", NR_KERNEL_STACK_B },
+ { "percpu", MEMCG_PERCPU_B },
+ { "sock", MEMCG_SOCK },
+ { "shmem", NR_SHMEM },
+ { "file_mapped", NR_FILE_MAPPED },
+ { "file_dirty", NR_FILE_DIRTY },
+ { "file_writeback", NR_WRITEBACK },
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- { "anon_thp", PAGE_SIZE, NR_ANON_THPS },
- { "file_thp", PAGE_SIZE, NR_FILE_THPS },
- { "shmem_thp", PAGE_SIZE, NR_SHMEM_THPS },
+ { "anon_thp", NR_ANON_THPS },
+ { "file_thp", NR_FILE_THPS },
+ { "shmem_thp", NR_SHMEM_THPS },
#endif
- { "inactive_anon", PAGE_SIZE, NR_INACTIVE_ANON },
- { "active_anon", PAGE_SIZE, NR_ACTIVE_ANON },
- { "inactive_file", PAGE_SIZE, NR_INACTIVE_FILE },
- { "active_file", PAGE_SIZE, NR_ACTIVE_FILE },
- { "unevictable", PAGE_SIZE, NR_UNEVICTABLE },
-
- /*
- * Note: The slab_reclaimable and slab_unreclaimable must be
- * together and slab_reclaimable must be in front.
- */
- { "slab_reclaimable", 1, NR_SLAB_RECLAIMABLE_B },
- { "slab_unreclaimable", 1, NR_SLAB_UNRECLAIMABLE_B },
+ { "inactive_anon", NR_INACTIVE_ANON },
+ { "active_anon", NR_ACTIVE_ANON },
+ { "inactive_file", NR_INACTIVE_FILE },
+ { "active_file", NR_ACTIVE_FILE },
+ { "unevictable", NR_UNEVICTABLE },
+ { "slab_reclaimable", NR_SLAB_RECLAIMABLE_B },
+ { "slab_unreclaimable", NR_SLAB_UNRECLAIMABLE_B },
/* The memory events */
- { "workingset_refault_anon", 1, WORKINGSET_REFAULT_ANON },
- { "workingset_refault_file", 1, WORKINGSET_REFAULT_FILE },
- { "workingset_activate_anon", 1, WORKINGSET_ACTIVATE_ANON },
- { "workingset_activate_file", 1, WORKINGSET_ACTIVATE_FILE },
- { "workingset_restore_anon", 1, WORKINGSET_RESTORE_ANON },
- { "workingset_restore_file", 1, WORKINGSET_RESTORE_FILE },
- { "workingset_nodereclaim", 1, WORKINGSET_NODERECLAIM },
+ { "workingset_refault_anon", WORKINGSET_REFAULT_ANON },
+ { "workingset_refault_file", WORKINGSET_REFAULT_FILE },
+ { "workingset_activate_anon", WORKINGSET_ACTIVATE_ANON },
+ { "workingset_activate_file", WORKINGSET_ACTIVATE_FILE },
+ { "workingset_restore_anon", WORKINGSET_RESTORE_ANON },
+ { "workingset_restore_file", WORKINGSET_RESTORE_FILE },
+ { "workingset_nodereclaim", WORKINGSET_NODERECLAIM },
};
+/* Translate stat items to the correct unit for memory.stat output */
+static int memcg_page_state_unit(int item)
+{
+ int unit;
+
+ switch (item) {
+ case WORKINGSET_REFAULT_ANON:
+ case WORKINGSET_REFAULT_FILE:
+ case WORKINGSET_ACTIVATE_ANON:
+ case WORKINGSET_ACTIVATE_FILE:
+ case WORKINGSET_RESTORE_ANON:
+ case WORKINGSET_RESTORE_FILE:
+ case WORKINGSET_NODERECLAIM:
+ unit = 1;
+ break;
+ default:
+ unit = memcg_stat_item_in_bytes(item) ? 1 : PAGE_SIZE;
+ break;
+ }
+
+ return unit;
+}
+
+static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg,
+ int item)
+{
+ return memcg_page_state(memcg, item) * memcg_page_state_unit(item);
+}
+
static char *memory_stat_format(struct mem_cgroup *memcg)
{
struct seq_buf s;
@@ -1558,13 +1581,16 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
u64 size;
- size = memcg_page_state(memcg, memory_stats[i].idx);
- size *= memory_stats[i].ratio;
+ size = memcg_page_state_output(memcg, memory_stats[i].idx);
seq_buf_printf(&s, "%s %llu\n", memory_stats[i].name, size);
+ /*
+ * We are printing reclaimable, unreclaimable of the slab
+ * and the sum of both.
+ */
if (unlikely(memory_stats[i].idx == NR_SLAB_UNRECLAIMABLE_B)) {
- size = memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) +
- memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B);
+ size += memcg_page_state_output(memcg,
+ NR_SLAB_RECLAIMABLE_B);
seq_buf_printf(&s, "slab %llu\n", size);
}
}
@@ -6358,6 +6384,12 @@ static int memory_stat_show(struct seq_file *m, void *v)
}
#ifdef CONFIG_NUMA
+static inline unsigned long lruvec_page_state_output(struct lruvec *lruvec,
+ int item)
+{
+ return lruvec_page_state(lruvec, item) * memcg_page_state_unit(item);
+}
+
static int memory_numa_stat_show(struct seq_file *m, void *v)
{
int i;
@@ -6375,8 +6407,8 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
struct lruvec *lruvec;
lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
- size = lruvec_page_state(lruvec, memory_stats[i].idx);
- size *= memory_stats[i].ratio;
+ size = lruvec_page_state_output(lruvec,
+ memory_stats[i].idx);
seq_printf(m, " N%d=%llu", nid, size);
}
seq_putc(m, '\n');
--
2.11.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 12/12] mm: memcontrol: remove {global_}node_page_state_pages
2020-12-06 8:29 [PATCH v2 07/12] mm: memcontrol: convert kernel stack account to bytes Muchun Song
` (3 preceding siblings ...)
2020-12-06 8:29 ` [PATCH v2 11/12] mm: memcontrol: make the slab calculation consistent Muchun Song
@ 2020-12-06 8:29 ` Muchun Song
4 siblings, 0 replies; 6+ messages in thread
From: Muchun Song @ 2020-12-06 8:29 UTC (permalink / raw)
To: gregkh, rafael, adobriyan, akpm, hannes, mhocko, vdavydov.dev,
hughd, will, guro, rppt, tglx, esyr, peterx, krisman, surenb,
avagin, elver, rdunlap, iamjoonsoo.kim
Cc: linux-kernel, linux-fsdevel, linux-mm, cgroups, Muchun Song
Now the unit of the vmstat counters are either pages or bytes. So we can
adjust the node_page_state to always returns values in pages and remove
the node_page_state_pages.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
drivers/base/node.c | 10 +++++-----
fs/proc/meminfo.c | 12 ++++++------
include/linux/vmstat.h | 17 +----------------
kernel/power/snapshot.c | 2 +-
mm/oom_kill.c | 2 +-
mm/page_alloc.c | 10 +++++-----
mm/vmscan.c | 2 +-
mm/vmstat.c | 23 ++++++-----------------
8 files changed, 26 insertions(+), 52 deletions(-)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index bc01ce0b2fcd..42298e3552e5 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -374,8 +374,8 @@ static ssize_t node_read_meminfo(struct device *dev,
unsigned long sreclaimable, sunreclaimable;
si_meminfo_node(&i, nid);
- sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B);
- sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B);
+ sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE_B);
+ sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE_B);
len = sysfs_emit_at(buf, len,
"Node %d MemTotal: %8lu kB\n"
"Node %d MemFree: %8lu kB\n"
@@ -446,9 +446,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
- nid, node_page_state(pgdat, NR_KERNEL_STACK_B) / SZ_1K,
+ nid, K(node_page_state(pgdat, NR_KERNEL_STACK_B)),
#ifdef CONFIG_SHADOW_CALL_STACK
- nid, node_page_state(pgdat, NR_KERNEL_SCS_B) / SZ_1K,
+ nid, K(node_page_state(pgdat, NR_KERNEL_SCS_B)),
#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, 0UL,
@@ -517,7 +517,7 @@ static ssize_t node_read_vmstat(struct device *dev,
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
len += sysfs_emit_at(buf, len, "%s %lu\n",
node_stat_name(i),
- node_page_state_pages(pgdat, i));
+ node_page_state(pgdat, i));
return len;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 69895e83d4fc..95ea5f062161 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -52,8 +52,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
available = si_mem_available();
- sreclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B);
- sunreclaim = global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B);
+ sreclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE_B);
+ sunreclaim = global_node_page_state(NR_SLAB_UNRECLAIMABLE_B);
show_val_kb(m, "MemTotal: ", i.totalram);
show_val_kb(m, "MemFree: ", i.freeram);
@@ -100,11 +100,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "Slab: ", sreclaimable + sunreclaim);
show_val_kb(m, "SReclaimable: ", sreclaimable);
show_val_kb(m, "SUnreclaim: ", sunreclaim);
- seq_printf(m, "KernelStack: %8lu kB\n",
- global_node_page_state(NR_KERNEL_STACK_B) / SZ_1K);
+ show_val_kb(m, "KernelStack: ",
+ global_node_page_state(NR_KERNEL_STACK_B));
#ifdef CONFIG_SHADOW_CALL_STACK
- seq_printf(m, "ShadowCallStack:%8lu kB\n",
- global_node_page_state(NR_KERNEL_SCS_B) / SZ_1K);
+ show_val_kb(m, "ShadowCallStack:",
+ global_node_page_state(NR_KERNEL_SCS_B));
#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index afd84dc2398c..ae821e016fdd 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -193,8 +193,7 @@ static inline unsigned long global_zone_page_state(enum zone_stat_item item)
return x;
}
-static inline
-unsigned long global_node_page_state_pages(enum node_stat_item item)
+static inline unsigned long global_node_page_state(enum node_stat_item item)
{
long x = atomic_long_read(&vm_node_stat[item]);
@@ -207,17 +206,6 @@ unsigned long global_node_page_state_pages(enum node_stat_item item)
return x;
}
-static inline unsigned long global_node_page_state(enum node_stat_item item)
-{
- long x = atomic_long_read(&vm_node_stat[item]);
-
-#ifdef CONFIG_SMP
- if (x < 0)
- x = 0;
-#endif
- return x;
-}
-
static inline unsigned long zone_page_state(struct zone *zone,
enum zone_stat_item item)
{
@@ -258,12 +246,9 @@ extern unsigned long sum_zone_node_page_state(int node,
extern unsigned long sum_zone_numa_state(int node, enum numa_stat_item item);
extern unsigned long node_page_state(struct pglist_data *pgdat,
enum node_stat_item item);
-extern unsigned long node_page_state_pages(struct pglist_data *pgdat,
- enum node_stat_item item);
#else
#define sum_zone_node_page_state(node, item) global_zone_page_state(item)
#define node_page_state(node, item) global_node_page_state(item)
-#define node_page_state_pages(node, item) global_node_page_state_pages(item)
#endif /* CONFIG_NUMA */
#ifdef CONFIG_SMP
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index d63560e1cf87..664520bdaa20 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1705,7 +1705,7 @@ static unsigned long minimum_image_size(unsigned long saveable)
{
unsigned long size;
- size = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B)
+ size = global_node_page_state(NR_SLAB_RECLAIMABLE_B)
+ global_node_page_state(NR_ACTIVE_ANON)
+ global_node_page_state(NR_INACTIVE_ANON)
+ global_node_page_state(NR_ACTIVE_FILE)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 04b19b7b5435..73861473c7d4 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -188,7 +188,7 @@ static bool should_dump_unreclaim_slab(void)
global_node_page_state(NR_ISOLATED_FILE) +
global_node_page_state(NR_UNEVICTABLE);
- return (global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B) > nr_lru);
+ return (global_node_page_state(NR_SLAB_UNRECLAIMABLE_B) > nr_lru);
}
/**
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 58916b3afdab..d16c9388c0b8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5372,7 +5372,7 @@ long si_mem_available(void)
* items that are in use, and cannot be freed. Cap this estimate at the
* low watermark.
*/
- reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) +
+ reclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE_B) +
global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
available += reclaimable - min(reclaimable / 2, wmark_low);
@@ -5516,8 +5516,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
global_node_page_state(NR_UNEVICTABLE),
global_node_page_state(NR_FILE_DIRTY),
global_node_page_state(NR_WRITEBACK),
- global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B),
- global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
+ global_node_page_state(NR_SLAB_RECLAIMABLE_B),
+ global_node_page_state(NR_SLAB_UNRECLAIMABLE_B),
global_node_page_state(NR_FILE_MAPPED),
global_node_page_state(NR_SHMEM),
global_zone_page_state(NR_PAGETABLE),
@@ -5572,9 +5572,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(node_page_state(pgdat, NR_ANON_THPS)),
#endif
K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
- node_page_state(pgdat, NR_KERNEL_STACK_B) / SZ_1K,
+ K(node_page_state(pgdat, NR_KERNEL_STACK_B)),
#ifdef CONFIG_SHADOW_CALL_STACK
- node_page_state(pgdat, NR_KERNEL_SCS_B) / SZ_1K,
+ K(node_page_state(pgdat, NR_KERNEL_SCS_B)),
#endif
pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
"yes" : "no");
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 469016222cdb..5d3c8fa68979 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4220,7 +4220,7 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
* unmapped file backed pages.
*/
if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages &&
- node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B) <=
+ node_page_state(pgdat, NR_SLAB_RECLAIMABLE_B) <=
pgdat->min_slab_pages)
return NODE_RECLAIM_FULL;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 25751b1d8e2e..b7cdef585efd 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1000,22 +1000,9 @@ unsigned long sum_zone_numa_state(int node,
}
/*
- * Determine the per node value of a stat item.
+ * Determine the per node value of a stat item. This always returns
+ * values in pages.
*/
-unsigned long node_page_state_pages(struct pglist_data *pgdat,
- enum node_stat_item item)
-{
- long x = atomic_long_read(&pgdat->vm_stat[item]);
-
-#ifdef CONFIG_SMP
- if (x < 0)
- x = 0;
-#endif
- if (vmstat_item_in_bytes(item))
- x >>= PAGE_SHIFT;
- return x;
-}
-
unsigned long node_page_state(struct pglist_data *pgdat,
enum node_stat_item item)
{
@@ -1025,6 +1012,8 @@ unsigned long node_page_state(struct pglist_data *pgdat,
if (x < 0)
x = 0;
#endif
+ if (vmstat_item_in_bytes(item))
+ x >>= PAGE_SHIFT;
return x;
}
#endif
@@ -1626,7 +1615,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
seq_printf(m, "\n per-node stats");
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
seq_printf(m, "\n %-12s %lu", node_stat_name(i),
- node_page_state_pages(pgdat, i));
+ node_page_state(pgdat, i));
}
}
seq_printf(m,
@@ -1747,7 +1736,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
#endif
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
- v[i] = global_node_page_state_pages(i);
+ v[i] = global_node_page_state(i);
v += NR_VM_NODE_STAT_ITEMS;
global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
--
2.11.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2020-12-06 8:32 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-06 8:29 [PATCH v2 07/12] mm: memcontrol: convert kernel stack account to bytes Muchun Song
2020-12-06 8:29 ` [PATCH v2 08/12] mm: memcontrol: convert NR_KERNEL_SCS_KB " Muchun Song
2020-12-06 8:29 ` [PATCH v2 09/12] mm: memcontrol: convert vmstat slab counters " Muchun Song
2020-12-06 8:29 ` [PATCH v2 10/12] mm: memcontrol: scale stat_threshold for byted-sized vmstat Muchun Song
2020-12-06 8:29 ` [PATCH v2 11/12] mm: memcontrol: make the slab calculation consistent Muchun Song
2020-12-06 8:29 ` [PATCH v2 12/12] mm: memcontrol: remove {global_}node_page_state_pages Muchun Song
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).