linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] memcg: add pagetable comsumption to memory.stat
@ 2020-11-26  0:56 Shakeel Butt
  2020-11-26  0:56 ` [PATCH 1/2] mm: move lruvec stats update functions to vmstat.h Shakeel Butt
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Shakeel Butt @ 2020-11-26  0:56 UTC (permalink / raw)
  To: Roman Gushchin, Johannes Weiner, Michal Hocko
  Cc: Andrew Morton, linux-mm, linux-kernel, Shakeel Butt

Many workloads consumes significant amount of memory in pagetables. This
patch series exposes the pagetable comsumption for each memory cgroup.

Shakeel Butt (2):
  mm: move lruvec stats update functions to vmstat.h
  mm: memcontrol: account pagetables per node

 Documentation/admin-guide/cgroup-v2.rst |   3 +
 arch/nds32/mm/mm-nds32.c                |   6 +-
 drivers/base/node.c                     |   2 +-
 fs/proc/meminfo.c                       |   2 +-
 include/linux/memcontrol.h              | 112 ------------------------
 include/linux/mm.h                      |  11 ++-
 include/linux/mmzone.h                  |   2 +-
 include/linux/vmstat.h                  | 104 ++++++++++++++++++++++
 mm/memcontrol.c                         |  19 ++++
 mm/page_alloc.c                         |   6 +-
 10 files changed, 142 insertions(+), 125 deletions(-)

-- 
2.29.2.454.gaff20da3a2-goog


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/2] mm: move lruvec stats update functions to vmstat.h
  2020-11-26  0:56 [PATCH 0/2] memcg: add pagetable comsumption to memory.stat Shakeel Butt
@ 2020-11-26  0:56 ` Shakeel Butt
  2020-11-30 20:09   ` Johannes Weiner
  2020-11-26  0:56 ` [PATCH 2/2] mm: memcontrol: account pagetables per node Shakeel Butt
  2020-11-30 20:34 ` [PATCH 0/2] memcg: add pagetable comsumption to memory.stat Roman Gushchin
  2 siblings, 1 reply; 10+ messages in thread
From: Shakeel Butt @ 2020-11-26  0:56 UTC (permalink / raw)
  To: Roman Gushchin, Johannes Weiner, Michal Hocko
  Cc: Andrew Morton, linux-mm, linux-kernel, Shakeel Butt

This patch does not change any functionality and only move the functions
which update the lruvec stats to vmstat.h from memcontrol.h. The main
reason for this patch is to be able to use these functions in the page
table contructor function which is defined in mm.h and we can not
include the memcontrol.h in that file.

Signed-off-by: Shakeel Butt <shakeelb@google.com>
---
 include/linux/memcontrol.h | 112 -------------------------------------
 include/linux/vmstat.h     | 104 ++++++++++++++++++++++++++++++++++
 mm/memcontrol.c            |  18 ++++++
 3 files changed, 122 insertions(+), 112 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 87ed56dc75f9..cd7b9136fb39 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -947,8 +947,6 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 
 void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 			      int val);
-void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
-			int val);
 void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val);
 
 static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
@@ -971,44 +969,6 @@ static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
 	local_irq_restore(flags);
 }
 
-static inline void mod_lruvec_state(struct lruvec *lruvec,
-				    enum node_stat_item idx, int val)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__mod_lruvec_state(lruvec, idx, val);
-	local_irq_restore(flags);
-}
-
-static inline void __mod_lruvec_page_state(struct page *page,
-					   enum node_stat_item idx, int val)
-{
-	struct page *head = compound_head(page); /* rmap on tail pages */
-	struct mem_cgroup *memcg = page_memcg(head);
-	pg_data_t *pgdat = page_pgdat(page);
-	struct lruvec *lruvec;
-
-	/* Untracked pages have no memcg, no lruvec. Update only the node */
-	if (!memcg) {
-		__mod_node_page_state(pgdat, idx, val);
-		return;
-	}
-
-	lruvec = mem_cgroup_lruvec(memcg, pgdat);
-	__mod_lruvec_state(lruvec, idx, val);
-}
-
-static inline void mod_lruvec_page_state(struct page *page,
-					 enum node_stat_item idx, int val)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__mod_lruvec_page_state(page, idx, val);
-	local_irq_restore(flags);
-}
-
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 						gfp_t gfp_mask,
 						unsigned long *total_scanned);
@@ -1411,30 +1371,6 @@ static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
 {
 }
 
-static inline void __mod_lruvec_state(struct lruvec *lruvec,
-				      enum node_stat_item idx, int val)
-{
-	__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
-}
-
-static inline void mod_lruvec_state(struct lruvec *lruvec,
-				    enum node_stat_item idx, int val)
-{
-	mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
-}
-
-static inline void __mod_lruvec_page_state(struct page *page,
-					   enum node_stat_item idx, int val)
-{
-	__mod_node_page_state(page_pgdat(page), idx, val);
-}
-
-static inline void mod_lruvec_page_state(struct page *page,
-					 enum node_stat_item idx, int val)
-{
-	mod_node_page_state(page_pgdat(page), idx, val);
-}
-
 static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
 					   int val)
 {
@@ -1490,30 +1426,6 @@ static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
 }
 #endif /* CONFIG_MEMCG */
 
-static inline void __inc_lruvec_state(struct lruvec *lruvec,
-				      enum node_stat_item idx)
-{
-	__mod_lruvec_state(lruvec, idx, 1);
-}
-
-static inline void __dec_lruvec_state(struct lruvec *lruvec,
-				      enum node_stat_item idx)
-{
-	__mod_lruvec_state(lruvec, idx, -1);
-}
-
-static inline void __inc_lruvec_page_state(struct page *page,
-					   enum node_stat_item idx)
-{
-	__mod_lruvec_page_state(page, idx, 1);
-}
-
-static inline void __dec_lruvec_page_state(struct page *page,
-					   enum node_stat_item idx)
-{
-	__mod_lruvec_page_state(page, idx, -1);
-}
-
 static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
 {
 	__mod_lruvec_kmem_state(p, idx, 1);
@@ -1524,30 +1436,6 @@ static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx)
 	__mod_lruvec_kmem_state(p, idx, -1);
 }
 
-static inline void inc_lruvec_state(struct lruvec *lruvec,
-				    enum node_stat_item idx)
-{
-	mod_lruvec_state(lruvec, idx, 1);
-}
-
-static inline void dec_lruvec_state(struct lruvec *lruvec,
-				    enum node_stat_item idx)
-{
-	mod_lruvec_state(lruvec, idx, -1);
-}
-
-static inline void inc_lruvec_page_state(struct page *page,
-					 enum node_stat_item idx)
-{
-	mod_lruvec_page_state(page, idx, 1);
-}
-
-static inline void dec_lruvec_page_state(struct page *page,
-					 enum node_stat_item idx)
-{
-	mod_lruvec_page_state(page, idx, -1);
-}
-
 static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
 {
 	struct mem_cgroup *memcg;
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 322dcbfcc933..773135fc6e19 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -450,4 +450,108 @@ static inline const char *vm_event_name(enum vm_event_item item)
 }
 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
 
+#ifdef CONFIG_MEMCG
+
+void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+			int val);
+
+static inline void mod_lruvec_state(struct lruvec *lruvec,
+				    enum node_stat_item idx, int val)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__mod_lruvec_state(lruvec, idx, val);
+	local_irq_restore(flags);
+}
+
+void __mod_lruvec_page_state(struct page *page,
+			     enum node_stat_item idx, int val);
+
+static inline void mod_lruvec_page_state(struct page *page,
+					 enum node_stat_item idx, int val)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__mod_lruvec_page_state(page, idx, val);
+	local_irq_restore(flags);
+}
+
+#else
+
+static inline void __mod_lruvec_state(struct lruvec *lruvec,
+				      enum node_stat_item idx, int val)
+{
+	__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+}
+
+static inline void mod_lruvec_state(struct lruvec *lruvec,
+				    enum node_stat_item idx, int val)
+{
+	mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+}
+
+static inline void __mod_lruvec_page_state(struct page *page,
+					   enum node_stat_item idx, int val)
+{
+	__mod_node_page_state(page_pgdat(page), idx, val);
+}
+
+static inline void mod_lruvec_page_state(struct page *page,
+					 enum node_stat_item idx, int val)
+{
+	mod_node_page_state(page_pgdat(page), idx, val);
+}
+
+#endif /* CONFIG_MEMCG */
+
+static inline void __inc_lruvec_state(struct lruvec *lruvec,
+				      enum node_stat_item idx)
+{
+	__mod_lruvec_state(lruvec, idx, 1);
+}
+
+static inline void __dec_lruvec_state(struct lruvec *lruvec,
+				      enum node_stat_item idx)
+{
+	__mod_lruvec_state(lruvec, idx, -1);
+}
+
+static inline void __inc_lruvec_page_state(struct page *page,
+					   enum node_stat_item idx)
+{
+	__mod_lruvec_page_state(page, idx, 1);
+}
+
+static inline void __dec_lruvec_page_state(struct page *page,
+					   enum node_stat_item idx)
+{
+	__mod_lruvec_page_state(page, idx, -1);
+}
+
+static inline void inc_lruvec_state(struct lruvec *lruvec,
+				    enum node_stat_item idx)
+{
+	mod_lruvec_state(lruvec, idx, 1);
+}
+
+static inline void dec_lruvec_state(struct lruvec *lruvec,
+				    enum node_stat_item idx)
+{
+	mod_lruvec_state(lruvec, idx, -1);
+}
+
+static inline void inc_lruvec_page_state(struct page *page,
+					 enum node_stat_item idx)
+{
+	mod_lruvec_page_state(page, idx, 1);
+}
+
+static inline void dec_lruvec_page_state(struct page *page,
+					 enum node_stat_item idx)
+{
+	mod_lruvec_page_state(page, idx, -1);
+}
+
 #endif /* _LINUX_VMSTAT_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9922f1510956..8b9352ddff0b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -847,6 +847,24 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 		__mod_memcg_lruvec_state(lruvec, idx, val);
 }
 
+void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx,
+			     int val)
+{
+	struct page *head = compound_head(page); /* rmap on tail pages */
+	struct mem_cgroup *memcg = page_memcg(head);
+	pg_data_t *pgdat = page_pgdat(page);
+	struct lruvec *lruvec;
+
+	/* Untracked pages have no memcg, no lruvec. Update only the node */
+	if (!memcg) {
+		__mod_node_page_state(pgdat, idx, val);
+		return;
+	}
+
+	lruvec = mem_cgroup_lruvec(memcg, pgdat);
+	__mod_lruvec_state(lruvec, idx, val);
+}
+
 void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val)
 {
 	pg_data_t *pgdat = page_pgdat(virt_to_page(p));
-- 
2.29.2.454.gaff20da3a2-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2] mm: memcontrol: account pagetables per node
  2020-11-26  0:56 [PATCH 0/2] memcg: add pagetable comsumption to memory.stat Shakeel Butt
  2020-11-26  0:56 ` [PATCH 1/2] mm: move lruvec stats update functions to vmstat.h Shakeel Butt
@ 2020-11-26  0:56 ` Shakeel Butt
  2020-11-30 20:19   ` Johannes Weiner
  2020-11-30 20:34 ` [PATCH 0/2] memcg: add pagetable comsumption to memory.stat Roman Gushchin
  2 siblings, 1 reply; 10+ messages in thread
From: Shakeel Butt @ 2020-11-26  0:56 UTC (permalink / raw)
  To: Roman Gushchin, Johannes Weiner, Michal Hocko
  Cc: Andrew Morton, linux-mm, linux-kernel, Shakeel Butt

For many workloads, pagetable consumption is significant and it makes
sense to expose it in the memory.stat for the memory cgroups. However at
the moment, the pagetables are accounted per-zone. Converting them to
per-node and using the right interface will correctly account for the
memory cgroups as well.

Signed-off-by: Shakeel Butt <shakeelb@google.com>
---
This patch was posted at [1] and [2] but more work was needed to make it
build for all archs.

[1] http://lkml.kernel.org/r/20201121022118.3143384-1-shakeelb@google.com
[2] http://lkml.kernel.org/r/20201123161425.341314-1-shakeelb@google.com

 Documentation/admin-guide/cgroup-v2.rst | 3 +++
 arch/nds32/mm/mm-nds32.c                | 6 +++---
 drivers/base/node.c                     | 2 +-
 fs/proc/meminfo.c                       | 2 +-
 include/linux/mm.h                      | 8 ++++----
 include/linux/mmzone.h                  | 2 +-
 mm/memcontrol.c                         | 1 +
 mm/page_alloc.c                         | 6 +++---
 8 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 515bb13084a0..63521cd36ce5 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1274,6 +1274,9 @@ PAGE_SIZE multiple when read back.
 	  kernel_stack
 		Amount of memory allocated to kernel stacks.
 
+	  pagetables
+                Amount of memory allocated for page tables.
+
 	  percpu(npn)
 		Amount of memory used for storing per-cpu kernel
 		data structures.
diff --git a/arch/nds32/mm/mm-nds32.c b/arch/nds32/mm/mm-nds32.c
index 55bec50ccc03..f2778f2b39f6 100644
--- a/arch/nds32/mm/mm-nds32.c
+++ b/arch/nds32/mm/mm-nds32.c
@@ -34,8 +34,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	cpu_dcache_wb_range((unsigned long)new_pgd,
 			    (unsigned long)new_pgd +
 			    PTRS_PER_PGD * sizeof(pgd_t));
-	inc_zone_page_state(virt_to_page((unsigned long *)new_pgd),
-			    NR_PAGETABLE);
+	inc_lruvec_page_state(virt_to_page((unsigned long *)new_pgd),
+			      NR_PAGETABLE);
 
 	return new_pgd;
 }
@@ -59,7 +59,7 @@ void pgd_free(struct mm_struct *mm, pgd_t * pgd)
 
 	pte = pmd_page(*pmd);
 	pmd_clear(pmd);
-	dec_zone_page_state(virt_to_page((unsigned long *)pgd), NR_PAGETABLE);
+	dec_lruvec_page_state(virt_to_page((unsigned long *)pgd), NR_PAGETABLE);
 	pte_free(mm, pte);
 	mm_dec_nr_ptes(mm);
 	pmd_free(mm, pmd);
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 6ffa470e2984..04f71c7bc3f8 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -450,7 +450,7 @@ static ssize_t node_read_meminfo(struct device *dev,
 #ifdef CONFIG_SHADOW_CALL_STACK
 			     nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
 #endif
-			     nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
+			     nid, K(node_page_state(pgdat, NR_PAGETABLE)),
 			     nid, 0UL,
 			     nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
 			     nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 887a5532e449..d6fc74619625 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -107,7 +107,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		   global_node_page_state(NR_KERNEL_SCS_KB));
 #endif
 	show_val_kb(m, "PageTables:     ",
-		    global_zone_page_state(NR_PAGETABLE));
+		    global_node_page_state(NR_PAGETABLE));
 
 	show_val_kb(m, "NFS_Unstable:   ", 0);
 	show_val_kb(m, "Bounce:         ",
diff --git a/include/linux/mm.h b/include/linux/mm.h
index eabe7d9f80d8..d1f64744ace2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2199,7 +2199,7 @@ static inline bool pgtable_pte_page_ctor(struct page *page)
 	if (!ptlock_init(page))
 		return false;
 	__SetPageTable(page);
-	inc_zone_page_state(page, NR_PAGETABLE);
+	inc_lruvec_page_state(page, NR_PAGETABLE);
 	return true;
 }
 
@@ -2207,7 +2207,7 @@ static inline void pgtable_pte_page_dtor(struct page *page)
 {
 	ptlock_free(page);
 	__ClearPageTable(page);
-	dec_zone_page_state(page, NR_PAGETABLE);
+	dec_lruvec_page_state(page, NR_PAGETABLE);
 }
 
 #define pte_offset_map_lock(mm, pmd, address, ptlp)	\
@@ -2294,7 +2294,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page)
 	if (!pmd_ptlock_init(page))
 		return false;
 	__SetPageTable(page);
-	inc_zone_page_state(page, NR_PAGETABLE);
+	inc_lruvec_page_state(page, NR_PAGETABLE);
 	return true;
 }
 
@@ -2302,7 +2302,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page)
 {
 	pmd_ptlock_free(page);
 	__ClearPageTable(page);
-	dec_zone_page_state(page, NR_PAGETABLE);
+	dec_lruvec_page_state(page, NR_PAGETABLE);
 }
 
 /*
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 15132adaa233..b593316bff3d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -151,7 +151,6 @@ enum zone_stat_item {
 	NR_ZONE_UNEVICTABLE,
 	NR_ZONE_WRITE_PENDING,	/* Count of dirty, writeback and unstable pages */
 	NR_MLOCK,		/* mlock()ed pages found and moved off LRU */
-	NR_PAGETABLE,		/* used for pagetables */
 	/* Second 128 byte cacheline */
 	NR_BOUNCE,
 #if IS_ENABLED(CONFIG_ZSMALLOC)
@@ -206,6 +205,7 @@ enum node_stat_item {
 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
 	NR_KERNEL_SCS_KB,	/* measured in KiB */
 #endif
+	NR_PAGETABLE,		/* used for pagetables */
 	NR_VM_NODE_STAT_ITEMS
 };
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8b9352ddff0b..b80328f52fb4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1519,6 +1519,7 @@ static struct memory_stat memory_stats[] = {
 	{ "anon", PAGE_SIZE, NR_ANON_MAPPED },
 	{ "file", PAGE_SIZE, NR_FILE_PAGES },
 	{ "kernel_stack", 1024, NR_KERNEL_STACK_KB },
+	{ "pagetables", PAGE_SIZE, NR_PAGETABLE },
 	{ "percpu", 1, MEMCG_PERCPU_B },
 	{ "sock", PAGE_SIZE, MEMCG_SOCK },
 	{ "shmem", PAGE_SIZE, NR_SHMEM },
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f91df593bf71..df749013f539 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5520,7 +5520,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 		global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
 		global_node_page_state(NR_FILE_MAPPED),
 		global_node_page_state(NR_SHMEM),
-		global_zone_page_state(NR_PAGETABLE),
+		global_node_page_state(NR_PAGETABLE),
 		global_zone_page_state(NR_BOUNCE),
 		global_zone_page_state(NR_FREE_PAGES),
 		free_pcp,
@@ -5552,6 +5552,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 #ifdef CONFIG_SHADOW_CALL_STACK
 			" shadow_call_stack:%lukB"
 #endif
+			" pagetables:%lukB"
 			" all_unreclaimable? %s"
 			"\n",
 			pgdat->node_id,
@@ -5577,6 +5578,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 #ifdef CONFIG_SHADOW_CALL_STACK
 			node_page_state(pgdat, NR_KERNEL_SCS_KB),
 #endif
+			K(node_page_state(pgdat, NR_PAGETABLE)),
 			pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
 				"yes" : "no");
 	}
@@ -5608,7 +5610,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			" present:%lukB"
 			" managed:%lukB"
 			" mlocked:%lukB"
-			" pagetables:%lukB"
 			" bounce:%lukB"
 			" free_pcp:%lukB"
 			" local_pcp:%ukB"
@@ -5629,7 +5630,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			K(zone->present_pages),
 			K(zone_managed_pages(zone)),
 			K(zone_page_state(zone, NR_MLOCK)),
-			K(zone_page_state(zone, NR_PAGETABLE)),
 			K(zone_page_state(zone, NR_BOUNCE)),
 			K(free_pcp),
 			K(this_cpu_read(zone->pageset->pcp.count)),
-- 
2.29.2.454.gaff20da3a2-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] mm: move lruvec stats update functions to vmstat.h
  2020-11-26  0:56 ` [PATCH 1/2] mm: move lruvec stats update functions to vmstat.h Shakeel Butt
@ 2020-11-30 20:09   ` Johannes Weiner
  0 siblings, 0 replies; 10+ messages in thread
From: Johannes Weiner @ 2020-11-30 20:09 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Roman Gushchin, Michal Hocko, Andrew Morton, linux-mm, linux-kernel

On Wed, Nov 25, 2020 at 04:56:02PM -0800, Shakeel Butt wrote:
> This patch does not change any functionality and only move the functions
> which update the lruvec stats to vmstat.h from memcontrol.h. The main
> reason for this patch is to be able to use these functions in the page
> table contructor function which is defined in mm.h and we can not
> include the memcontrol.h in that file.
> 
> Signed-off-by: Shakeel Butt <shakeelb@google.com>

This is a better place for this interface in general. The lruvec
abstraction, while invented for memcg, isn't specific to memcg at all.

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] mm: memcontrol: account pagetables per node
  2020-11-26  0:56 ` [PATCH 2/2] mm: memcontrol: account pagetables per node Shakeel Butt
@ 2020-11-30 20:19   ` Johannes Weiner
  2020-11-30 20:54     ` Shakeel Butt
  0 siblings, 1 reply; 10+ messages in thread
From: Johannes Weiner @ 2020-11-30 20:19 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Roman Gushchin, Michal Hocko, Andrew Morton, linux-mm, linux-kernel

On Wed, Nov 25, 2020 at 04:56:03PM -0800, Shakeel Butt wrote:
> For many workloads, pagetable consumption is significant and it makes
> sense to expose it in the memory.stat for the memory cgroups. However at
> the moment, the pagetables are accounted per-zone. Converting them to
> per-node and using the right interface will correctly account for the
> memory cgroups as well.
> 
> Signed-off-by: Shakeel Butt <shakeelb@google.com>

Agreed, this is a useful stat item to have.

Just one trivial issue:

> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -151,7 +151,6 @@ enum zone_stat_item {
>  	NR_ZONE_UNEVICTABLE,
>  	NR_ZONE_WRITE_PENDING,	/* Count of dirty, writeback and unstable pages */
>  	NR_MLOCK,		/* mlock()ed pages found and moved off LRU */
> -	NR_PAGETABLE,		/* used for pagetables */
>  	/* Second 128 byte cacheline */
>  	NR_BOUNCE,
>  #if IS_ENABLED(CONFIG_ZSMALLOC)
> @@ -206,6 +205,7 @@ enum node_stat_item {
>  #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
>  	NR_KERNEL_SCS_KB,	/* measured in KiB */
>  #endif
> +	NR_PAGETABLE,		/* used for pagetables */
>  	NR_VM_NODE_STAT_ITEMS
>  };

You need to update mm/vmstat.c::vmstat_text accordingly or
/proc/vmstat output will be bogus.

With that fixed, please feel free to add:
Acked-by: Johannes Weiner <hannes@cmpxchg.org>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 0/2] memcg: add pagetable comsumption to memory.stat
  2020-11-26  0:56 [PATCH 0/2] memcg: add pagetable comsumption to memory.stat Shakeel Butt
  2020-11-26  0:56 ` [PATCH 1/2] mm: move lruvec stats update functions to vmstat.h Shakeel Butt
  2020-11-26  0:56 ` [PATCH 2/2] mm: memcontrol: account pagetables per node Shakeel Butt
@ 2020-11-30 20:34 ` Roman Gushchin
  2020-11-30 21:01   ` Shakeel Butt
  2 siblings, 1 reply; 10+ messages in thread
From: Roman Gushchin @ 2020-11-30 20:34 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Johannes Weiner, Michal Hocko, Andrew Morton, linux-mm, linux-kernel

On Wed, Nov 25, 2020 at 04:56:01PM -0800, Shakeel Butt wrote:
> Many workloads consumes significant amount of memory in pagetables. This
> patch series exposes the pagetable comsumption for each memory cgroup.

Hi Shakeel!

The code looks good to me. However I'm not sure I understand what's the
use case for the new statistics? Can you, please, elaborate a bit more here?

From a very first glance, the size of pagetables should be _roughly_ equal
to the size_of(pte)/PAGE_SIZE*(size of a cgroup) and should not exceed 1%
of the cgroup size. So for all but very large cgroups the value will be
in the noise of per-cpu counters. Perhaps I'm missing some important cases?

Thanks!

> 
> Shakeel Butt (2):
>   mm: move lruvec stats update functions to vmstat.h
>   mm: memcontrol: account pagetables per node
> 
>  Documentation/admin-guide/cgroup-v2.rst |   3 +
>  arch/nds32/mm/mm-nds32.c                |   6 +-
>  drivers/base/node.c                     |   2 +-
>  fs/proc/meminfo.c                       |   2 +-
>  include/linux/memcontrol.h              | 112 ------------------------
>  include/linux/mm.h                      |  11 ++-
>  include/linux/mmzone.h                  |   2 +-
>  include/linux/vmstat.h                  | 104 ++++++++++++++++++++++
>  mm/memcontrol.c                         |  19 ++++
>  mm/page_alloc.c                         |   6 +-
>  10 files changed, 142 insertions(+), 125 deletions(-)
> 
> -- 
> 2.29.2.454.gaff20da3a2-goog
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] mm: memcontrol: account pagetables per node
  2020-11-30 20:19   ` Johannes Weiner
@ 2020-11-30 20:54     ` Shakeel Butt
  0 siblings, 0 replies; 10+ messages in thread
From: Shakeel Butt @ 2020-11-30 20:54 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Roman Gushchin, Michal Hocko, Andrew Morton, Linux MM, LKML

On Mon, Nov 30, 2020 at 12:21 PM Johannes Weiner <hannes@cmpxchg.org> wrote:
>
> On Wed, Nov 25, 2020 at 04:56:03PM -0800, Shakeel Butt wrote:
> > For many workloads, pagetable consumption is significant and it makes
> > sense to expose it in the memory.stat for the memory cgroups. However at
> > the moment, the pagetables are accounted per-zone. Converting them to
> > per-node and using the right interface will correctly account for the
> > memory cgroups as well.
> >
> > Signed-off-by: Shakeel Butt <shakeelb@google.com>
>
> Agreed, this is a useful stat item to have.
>
> Just one trivial issue:
>
> > --- a/include/linux/mmzone.h
> > +++ b/include/linux/mmzone.h
> > @@ -151,7 +151,6 @@ enum zone_stat_item {
> >       NR_ZONE_UNEVICTABLE,
> >       NR_ZONE_WRITE_PENDING,  /* Count of dirty, writeback and unstable pages */
> >       NR_MLOCK,               /* mlock()ed pages found and moved off LRU */
> > -     NR_PAGETABLE,           /* used for pagetables */
> >       /* Second 128 byte cacheline */
> >       NR_BOUNCE,
> >  #if IS_ENABLED(CONFIG_ZSMALLOC)
> > @@ -206,6 +205,7 @@ enum node_stat_item {
> >  #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
> >       NR_KERNEL_SCS_KB,       /* measured in KiB */
> >  #endif
> > +     NR_PAGETABLE,           /* used for pagetables */
> >       NR_VM_NODE_STAT_ITEMS
> >  };
>
> You need to update mm/vmstat.c::vmstat_text accordingly or
> /proc/vmstat output will be bogus.

Oh I missed that. Thanks for catching.

>
> With that fixed, please feel free to add:
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>

Thanks.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 0/2] memcg: add pagetable comsumption to memory.stat
  2020-11-30 20:34 ` [PATCH 0/2] memcg: add pagetable comsumption to memory.stat Roman Gushchin
@ 2020-11-30 21:01   ` Shakeel Butt
  2020-11-30 21:10     ` Roman Gushchin
  0 siblings, 1 reply; 10+ messages in thread
From: Shakeel Butt @ 2020-11-30 21:01 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Johannes Weiner, Michal Hocko, Andrew Morton, Linux MM, LKML

On Mon, Nov 30, 2020 at 12:34 PM Roman Gushchin <guro@fb.com> wrote:
>
> On Wed, Nov 25, 2020 at 04:56:01PM -0800, Shakeel Butt wrote:
> > Many workloads consumes significant amount of memory in pagetables. This
> > patch series exposes the pagetable comsumption for each memory cgroup.
>
> Hi Shakeel!
>
> The code looks good to me. However I'm not sure I understand what's the
> use case for the new statistics? Can you, please, elaborate a bit more here?
>
> From a very first glance, the size of pagetables should be _roughly_ equal
> to the size_of(pte)/PAGE_SIZE*(size of a cgroup) and should not exceed 1%
> of the cgroup size. So for all but very large cgroups the value will be
> in the noise of per-cpu counters. Perhaps I'm missing some important cases?
>

I think this is in general a good metric to have but one specific
use-case we have is the user space network driver which mmaps the
memory of the applications for zero copy data transfers. This driver
can consume a large amount of memory in page tables. So, this metric
becomes really useful here.

Shakeel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 0/2] memcg: add pagetable comsumption to memory.stat
  2020-11-30 21:01   ` Shakeel Butt
@ 2020-11-30 21:10     ` Roman Gushchin
  2020-11-30 21:13       ` Shakeel Butt
  0 siblings, 1 reply; 10+ messages in thread
From: Roman Gushchin @ 2020-11-30 21:10 UTC (permalink / raw)
  To: Shakeel Butt; +Cc: Johannes Weiner, Michal Hocko, Andrew Morton, Linux MM, LKML

On Mon, Nov 30, 2020 at 01:01:18PM -0800, Shakeel Butt wrote:
> On Mon, Nov 30, 2020 at 12:34 PM Roman Gushchin <guro@fb.com> wrote:
> >
> > On Wed, Nov 25, 2020 at 04:56:01PM -0800, Shakeel Butt wrote:
> > > Many workloads consumes significant amount of memory in pagetables. This
> > > patch series exposes the pagetable comsumption for each memory cgroup.
> >
> > Hi Shakeel!
> >
> > The code looks good to me. However I'm not sure I understand what's the
> > use case for the new statistics? Can you, please, elaborate a bit more here?
> >
> > From a very first glance, the size of pagetables should be _roughly_ equal
> > to the size_of(pte)/PAGE_SIZE*(size of a cgroup) and should not exceed 1%
> > of the cgroup size. So for all but very large cgroups the value will be
> > in the noise of per-cpu counters. Perhaps I'm missing some important cases?
> >
> 
> I think this is in general a good metric to have but one specific
> use-case we have is the user space network driver which mmaps the
> memory of the applications for zero copy data transfers. This driver
> can consume a large amount of memory in page tables. So, this metric
> becomes really useful here.

Got it, thank you for the explanation!
Would you mind to add this text to the cover letter as an example?

Acked-by: Roman Gushchin <guro@fb.com>
for the series.

Thanks!

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 0/2] memcg: add pagetable comsumption to memory.stat
  2020-11-30 21:10     ` Roman Gushchin
@ 2020-11-30 21:13       ` Shakeel Butt
  0 siblings, 0 replies; 10+ messages in thread
From: Shakeel Butt @ 2020-11-30 21:13 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Johannes Weiner, Michal Hocko, Andrew Morton, Linux MM, LKML

On Mon, Nov 30, 2020 at 1:10 PM Roman Gushchin <guro@fb.com> wrote:
>
> On Mon, Nov 30, 2020 at 01:01:18PM -0800, Shakeel Butt wrote:
> > On Mon, Nov 30, 2020 at 12:34 PM Roman Gushchin <guro@fb.com> wrote:
> > >
> > > On Wed, Nov 25, 2020 at 04:56:01PM -0800, Shakeel Butt wrote:
> > > > Many workloads consumes significant amount of memory in pagetables. This
> > > > patch series exposes the pagetable comsumption for each memory cgroup.
> > >
> > > Hi Shakeel!
> > >
> > > The code looks good to me. However I'm not sure I understand what's the
> > > use case for the new statistics? Can you, please, elaborate a bit more here?
> > >
> > > From a very first glance, the size of pagetables should be _roughly_ equal
> > > to the size_of(pte)/PAGE_SIZE*(size of a cgroup) and should not exceed 1%
> > > of the cgroup size. So for all but very large cgroups the value will be
> > > in the noise of per-cpu counters. Perhaps I'm missing some important cases?
> > >
> >
> > I think this is in general a good metric to have but one specific
> > use-case we have is the user space network driver which mmaps the
> > memory of the applications for zero copy data transfers. This driver
> > can consume a large amount of memory in page tables. So, this metric
> > becomes really useful here.
>
> Got it, thank you for the explanation!
> Would you mind to add this text to the cover letter as an example?

Sure.

>
> Acked-by: Roman Gushchin <guro@fb.com>
> for the series.

Thanks for the review.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-11-30 21:13 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-26  0:56 [PATCH 0/2] memcg: add pagetable comsumption to memory.stat Shakeel Butt
2020-11-26  0:56 ` [PATCH 1/2] mm: move lruvec stats update functions to vmstat.h Shakeel Butt
2020-11-30 20:09   ` Johannes Weiner
2020-11-26  0:56 ` [PATCH 2/2] mm: memcontrol: account pagetables per node Shakeel Butt
2020-11-30 20:19   ` Johannes Weiner
2020-11-30 20:54     ` Shakeel Butt
2020-11-30 20:34 ` [PATCH 0/2] memcg: add pagetable comsumption to memory.stat Roman Gushchin
2020-11-30 21:01   ` Shakeel Butt
2020-11-30 21:10     ` Roman Gushchin
2020-11-30 21:13       ` Shakeel Butt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).