linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes
@ 2017-10-05 10:14 Kirill A. Shutemov
  2017-10-05 10:14 ` [PATCHv2 2/2] mm: Consolidate page table accounting Kirill A. Shutemov
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Kirill A. Shutemov @ 2017-10-05 10:14 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, linux-kernel, linux-api, Michal Hocko, Kirill A. Shutemov

Let's add wrappers for ->nr_ptes with the same interface as for nr_pmd
and nr_pud.

It's preparation for consolidation of page-table counters in mm_struct.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/arm/mm/pgd.c           |  2 +-
 arch/sparc/mm/hugetlbpage.c |  2 +-
 arch/unicore32/mm/pgd.c     |  2 +-
 fs/proc/task_mmu.c          |  2 +-
 include/linux/mm.h          | 32 ++++++++++++++++++++++++++++++++
 include/linux/mm_types.h    |  2 ++
 kernel/fork.c               |  6 +++---
 mm/debug.c                  |  2 +-
 mm/huge_memory.c            | 10 +++++-----
 mm/khugepaged.c             |  2 +-
 mm/memory.c                 |  8 ++++----
 mm/oom_kill.c               |  5 ++---
 12 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index c1c1a5c67da1..61e281cb29fb 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -141,7 +141,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd_base)
 	pte = pmd_pgtable(*pmd);
 	pmd_clear(pmd);
 	pte_free(mm, pte);
-	atomic_long_dec(&mm->nr_ptes);
+	mm_dec_nr_ptes(mm);
 no_pmd:
 	pud_clear(pud);
 	pmd_free(mm, pmd);
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index fd0d85808828..29fa5967b7d2 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -396,7 +396,7 @@ static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
 
 	pmd_clear(pmd);
 	pte_free_tlb(tlb, token, addr);
-	atomic_long_dec(&tlb->mm->nr_ptes);
+	mm_dec_nr_ptes(tlb->mm);
 }
 
 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
diff --git a/arch/unicore32/mm/pgd.c b/arch/unicore32/mm/pgd.c
index c572a28c76c9..a830a300aaa1 100644
--- a/arch/unicore32/mm/pgd.c
+++ b/arch/unicore32/mm/pgd.c
@@ -97,7 +97,7 @@ void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd)
 	pte = pmd_pgtable(*pmd);
 	pmd_clear(pmd);
 	pte_free(mm, pte);
-	atomic_long_dec(&mm->nr_ptes);
+	mm_dec_nr_ptes(mm);
 	pmd_free(mm, pmd);
 	mm_dec_nr_pmds(mm);
 free:
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 627de66204bd..84c262d5197a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -49,7 +49,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
 	swap = get_mm_counter(mm, MM_SWAPENTS);
-	ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes);
+	ptes = PTRS_PER_PTE * sizeof(pte_t) * mm_nr_ptes(mm);
 	pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
 	puds = PTRS_PER_PUD * sizeof(pud_t) * mm_nr_puds(mm);
 	seq_printf(m,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5125c51c9c35..e185dcdc5183 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1679,6 +1679,38 @@ static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 }
 #endif
 
+#ifdef CONFIG_MMU
+static inline void mm_nr_ptes_init(struct mm_struct *mm)
+{
+	atomic_long_set(&mm->nr_ptes, 0);
+}
+
+static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
+{
+	return atomic_long_read(&mm->nr_ptes);
+}
+
+static inline void mm_inc_nr_ptes(struct mm_struct *mm)
+{
+	atomic_long_inc(&mm->nr_ptes);
+}
+
+static inline void mm_dec_nr_ptes(struct mm_struct *mm)
+{
+	atomic_long_dec(&mm->nr_ptes);
+}
+#else
+static inline void mm_nr_ptes_init(struct mm_struct *mm) {}
+
+static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
+static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
+#endif
+
 int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
 int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6c8c2bb9e5a1..95d0eefe1f4a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -398,7 +398,9 @@ struct mm_struct {
 	 */
 	atomic_t mm_count;
 
+#ifdef CONFIG_MMU
 	atomic_long_t nr_ptes;			/* PTE page table pages */
+#endif
 #if CONFIG_PGTABLE_LEVELS > 2
 	atomic_long_t nr_pmds;			/* PMD page table pages */
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 5624918154db..d466181902cf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -813,7 +813,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
 	mm->core_state = NULL;
-	atomic_long_set(&mm->nr_ptes, 0);
+	mm_nr_ptes_init(mm);
 	mm_nr_pmds_init(mm);
 	mm_nr_puds_init(mm);
 	mm->map_count = 0;
@@ -869,9 +869,9 @@ static void check_mm(struct mm_struct *mm)
 					  "mm:%p idx:%d val:%ld\n", mm, i, x);
 	}
 
-	if (atomic_long_read(&mm->nr_ptes))
+	if (mm_nr_ptes(mm))
 		pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n",
-				atomic_long_read(&mm->nr_ptes));
+				mm_nr_ptes(mm));
 	if (mm_nr_pmds(mm))
 		pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n",
 				mm_nr_pmds(mm));
diff --git a/mm/debug.c b/mm/debug.c
index afccb2565269..177326818d24 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -135,7 +135,7 @@ void dump_mm(const struct mm_struct *mm)
 		mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
 		mm->pgd, atomic_read(&mm->mm_users),
 		atomic_read(&mm->mm_count),
-		atomic_long_read((atomic_long_t *)&mm->nr_ptes),
+		mm_nr_ptes(mm),
 		mm_nr_pmds(mm),
 		mm_nr_puds(mm),
 		mm->map_count,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 269b5df58543..c037d3d34950 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -606,7 +606,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
 		set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
 		add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
-		atomic_long_inc(&vma->vm_mm->nr_ptes);
+		mm_inc_nr_ptes(vma->vm_mm);
 		spin_unlock(vmf->ptl);
 		count_vm_event(THP_FAULT_ALLOC);
 	}
@@ -662,7 +662,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 	if (pgtable)
 		pgtable_trans_huge_deposit(mm, pmd, pgtable);
 	set_pmd_at(mm, haddr, pmd, entry);
-	atomic_long_inc(&mm->nr_ptes);
+	mm_inc_nr_ptes(mm);
 	return true;
 }
 
@@ -747,7 +747,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 
 	if (pgtable) {
 		pgtable_trans_huge_deposit(mm, pmd, pgtable);
-		atomic_long_inc(&mm->nr_ptes);
+		mm_inc_nr_ptes(mm);
 	}
 
 	set_pmd_at(mm, addr, pmd, entry);
@@ -975,7 +975,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	get_page(src_page);
 	page_dup_rmap(src_page, true);
 	add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
-	atomic_long_inc(&dst_mm->nr_ptes);
+	mm_inc_nr_ptes(dst_mm);
 	pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
 
 	pmdp_set_wrprotect(src_mm, addr, src_pmd);
@@ -1675,7 +1675,7 @@ static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
 
 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
 	pte_free(mm, pgtable);
-	atomic_long_dec(&mm->nr_ptes);
+	mm_dec_nr_ptes(mm);
 }
 
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index c01f177a1120..9e36fe8857d9 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1269,7 +1269,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 			_pmd = pmdp_collapse_flush(vma, addr, pmd);
 			spin_unlock(ptl);
 			up_write(&vma->vm_mm->mmap_sem);
-			atomic_long_dec(&vma->vm_mm->nr_ptes);
+			mm_dec_nr_ptes(vma->vm_mm);
 			pte_free(vma->vm_mm, pmd_pgtable(_pmd));
 		}
 	}
diff --git a/mm/memory.c b/mm/memory.c
index 291d4984b417..c443456dbd02 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -438,7 +438,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
 	pgtable_t token = pmd_pgtable(*pmd);
 	pmd_clear(pmd);
 	pte_free_tlb(tlb, token, addr);
-	atomic_long_dec(&tlb->mm->nr_ptes);
+	mm_dec_nr_ptes(tlb->mm);
 }
 
 static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -666,7 +666,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 
 	ptl = pmd_lock(mm, pmd);
 	if (likely(pmd_none(*pmd))) {	/* Has another populated it ? */
-		atomic_long_inc(&mm->nr_ptes);
+		mm_inc_nr_ptes(mm);
 		pmd_populate(mm, pmd, new);
 		new = NULL;
 	}
@@ -3213,7 +3213,7 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
 			goto map_pte;
 		}
 
-		atomic_long_inc(&vma->vm_mm->nr_ptes);
+		mm_inc_nr_ptes(vma->vm_mm);
 		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
 		spin_unlock(vmf->ptl);
 		vmf->prealloc_pte = NULL;
@@ -3272,7 +3272,7 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
 	 * We are going to consume the prealloc table,
 	 * count that as nr_ptes.
 	 */
-	atomic_long_inc(&vma->vm_mm->nr_ptes);
+	mm_inc_nr_ptes(vma->vm_mm);
 	vmf->prealloc_pte = NULL;
 }
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 4bee6968885d..851a0eec2624 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -200,8 +200,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 	 * task's rss, pagetable and swap space use.
 	 */
 	points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
-		atomic_long_read(&p->mm->nr_ptes) + mm_nr_pmds(p->mm) +
-		mm_nr_puds(p->mm);
+		mm_nr_ptes(p->mm) + mm_nr_pmds(p->mm) + mm_nr_puds(p->mm);
 	task_unlock(p);
 
 	/*
@@ -396,7 +395,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 		pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %7ld %8lu         %5hd %s\n",
 			task->pid, from_kuid(&init_user_ns, task_uid(task)),
 			task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
-			atomic_long_read(&task->mm->nr_ptes),
+			mm_nr_ptes(task->mm),
 			mm_nr_pmds(task->mm),
 			mm_nr_puds(task->mm),
 			get_mm_counter(task->mm, MM_SWAPENTS),
-- 
2.14.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCHv2 2/2] mm: Consolidate page table accounting
  2017-10-05 10:14 [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes Kirill A. Shutemov
@ 2017-10-05 10:14 ` Kirill A. Shutemov
  2017-10-06  8:50   ` Michal Hocko
  2017-10-06  4:02 ` [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes Anshuman Khandual
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Kirill A. Shutemov @ 2017-10-05 10:14 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, linux-kernel, linux-api, Michal Hocko, Kirill A. Shutemov

Currently, we account page tables separately for each page table level,
but that's redundant -- we only make use of total memory allocated to
page tables for oom_badness calculation. We also provide the information
to userspace, but it has dubious value there too.

This patch switches page table accounting to single counter.

mm->pgtables_bytes is now used to account all page table levels. We use
bytes, because page table size for different levels of page table tree
may be different.

The change has user-visible effect: we don't have VmPMD and VmPUD
reported in /proc/[pid]/status. Not sure if anybody uses them.
(As alternative, we can always report 0 kB for them.)

OOM-killer report is also slightly changed: we now report pgtables_bytes
instead of nr_ptes, nr_pmd, nr_puds.

The benefit is that we now calculate oom_badness() more correctly for
machines which have different size of page tables depending on level
or where page tables are less than a page in size.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 Documentation/filesystems/proc.txt |  1 -
 Documentation/sysctl/vm.txt        |  8 +++---
 fs/proc/task_mmu.c                 | 11 ++------
 include/linux/mm.h                 | 58 ++++++++------------------------------
 include/linux/mm_types.h           |  8 +-----
 kernel/fork.c                      | 16 +++--------
 mm/debug.c                         |  7 ++---
 mm/oom_kill.c                      | 14 ++++-----
 8 files changed, 31 insertions(+), 92 deletions(-)

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index adba21b5ada7..ec571b9bb18a 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -250,7 +250,6 @@ Table 1-2: Contents of the status files (as of 4.8)
  VmExe                       size of text segment
  VmLib                       size of shared library code
  VmPTE                       size of page table entries
- VmPMD                       size of second level page tables
  VmSwap                      amount of swap used by anonymous private data
                              (shmem swap usage is not included)
  HugetlbPages                size of hugetlb memory portions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 2717b6f2d706..2db0596d12f4 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -622,10 +622,10 @@ oom_dump_tasks
 
 Enables a system-wide task dump (excluding kernel threads) to be produced
 when the kernel performs an OOM-killing and includes such information as
-pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, nr_puds, swapents,
-oom_score_adj score, and name.  This is helpful to determine why the OOM
-killer was invoked, to identify the rogue task that caused it, and to
-determine why the OOM killer chose the task it did to kill.
+pid, uid, tgid, vm size, rss, pgtables_bytes, swapents, oom_score_adj
+score, and name.  This is helpful to determine why the OOM killer was
+invoked, to identify the rogue task that caused it, and to determine why
+the OOM killer chose the task it did to kill.
 
 If this is set to zero, this information is suppressed.  On very
 large systems with thousands of tasks it may not be feasible to dump
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 84c262d5197a..c9c81373225d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -25,7 +25,7 @@
 
 void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
-	unsigned long text, lib, swap, ptes, pmds, puds, anon, file, shmem;
+	unsigned long text, lib, swap, anon, file, shmem;
 	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
 
 	anon = get_mm_counter(mm, MM_ANONPAGES);
@@ -49,9 +49,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
 	swap = get_mm_counter(mm, MM_SWAPENTS);
-	ptes = PTRS_PER_PTE * sizeof(pte_t) * mm_nr_ptes(mm);
-	pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
-	puds = PTRS_PER_PUD * sizeof(pud_t) * mm_nr_puds(mm);
 	seq_printf(m,
 		"VmPeak:\t%8lu kB\n"
 		"VmSize:\t%8lu kB\n"
@@ -67,8 +64,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		"VmExe:\t%8lu kB\n"
 		"VmLib:\t%8lu kB\n"
 		"VmPTE:\t%8lu kB\n"
-		"VmPMD:\t%8lu kB\n"
-		"VmPUD:\t%8lu kB\n"
 		"VmSwap:\t%8lu kB\n",
 		hiwater_vm << (PAGE_SHIFT-10),
 		total_vm << (PAGE_SHIFT-10),
@@ -81,9 +76,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		shmem << (PAGE_SHIFT-10),
 		mm->data_vm << (PAGE_SHIFT-10),
 		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
-		ptes >> 10,
-		pmds >> 10,
-		puds >> 10,
+		mm_pgtables_bytes(mm) >> 10,
 		swap << (PAGE_SHIFT-10));
 	hugetlb_report_usage(m, mm);
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e185dcdc5183..a7e50c464021 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1604,37 +1604,20 @@ static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
 {
 	return 0;
 }
-
-static inline unsigned long mm_nr_puds(const struct mm_struct *mm)
-{
-	return 0;
-}
-
-static inline void mm_nr_puds_init(struct mm_struct *mm) {}
 static inline void mm_inc_nr_puds(struct mm_struct *mm) {}
 static inline void mm_dec_nr_puds(struct mm_struct *mm) {}
 
 #else
 int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
 
-static inline void mm_nr_puds_init(struct mm_struct *mm)
-{
-	atomic_long_set(&mm->nr_puds, 0);
-}
-
-static inline unsigned long mm_nr_puds(const struct mm_struct *mm)
-{
-	return atomic_long_read(&mm->nr_puds);
-}
-
 static inline void mm_inc_nr_puds(struct mm_struct *mm)
 {
-	atomic_long_inc(&mm->nr_puds);
+	atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_puds(struct mm_struct *mm)
 {
-	atomic_long_dec(&mm->nr_puds);
+	atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
 }
 #endif
 
@@ -1645,64 +1628,47 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
 	return 0;
 }
 
-static inline void mm_nr_pmds_init(struct mm_struct *mm) {}
-
-static inline unsigned long mm_nr_pmds(const struct mm_struct *mm)
-{
-	return 0;
-}
-
 static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
 static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}
 
 #else
 int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
 
-static inline void mm_nr_pmds_init(struct mm_struct *mm)
-{
-	atomic_long_set(&mm->nr_pmds, 0);
-}
-
-static inline unsigned long mm_nr_pmds(const struct mm_struct *mm)
-{
-	return atomic_long_read(&mm->nr_pmds);
-}
-
 static inline void mm_inc_nr_pmds(struct mm_struct *mm)
 {
-	atomic_long_inc(&mm->nr_pmds);
+	atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 {
-	atomic_long_dec(&mm->nr_pmds);
+	atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
 }
 #endif
 
 #ifdef CONFIG_MMU
-static inline void mm_nr_ptes_init(struct mm_struct *mm)
+static inline void mm_pgtables_bytes_init(struct mm_struct *mm)
 {
-	atomic_long_set(&mm->nr_ptes, 0);
+	atomic_long_set(&mm->pgtables_bytes, 0);
 }
 
-static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
+static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm)
 {
-	return atomic_long_read(&mm->nr_ptes);
+	return atomic_long_read(&mm->pgtables_bytes);
 }
 
 static inline void mm_inc_nr_ptes(struct mm_struct *mm)
 {
-	atomic_long_inc(&mm->nr_ptes);
+	atomic_long_add(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_ptes(struct mm_struct *mm)
 {
-	atomic_long_dec(&mm->nr_ptes);
+	atomic_long_sub(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes);
 }
 #else
-static inline void mm_nr_ptes_init(struct mm_struct *mm) {}
 
-static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
+static inline void mm_pgtables_bytes_init(struct mm_struct *mm) {}
+static inline unsigned long mm_pgtables_bytes(struct mm_struct *mm)
 {
 	return 0;
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 95d0eefe1f4a..aadd23377fbb 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -399,13 +399,7 @@ struct mm_struct {
 	atomic_t mm_count;
 
 #ifdef CONFIG_MMU
-	atomic_long_t nr_ptes;			/* PTE page table pages */
-#endif
-#if CONFIG_PGTABLE_LEVELS > 2
-	atomic_long_t nr_pmds;			/* PMD page table pages */
-#endif
-#if CONFIG_PGTABLE_LEVELS > 3
-	atomic_long_t nr_puds;			/* PUD page table pages */
+	atomic_long_t pgtables_bytes;		/* PTE page table pages */
 #endif
 	int map_count;				/* number of VMAs */
 
diff --git a/kernel/fork.c b/kernel/fork.c
index d466181902cf..ad849ccdad9e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -813,9 +813,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
 	mm->core_state = NULL;
-	mm_nr_ptes_init(mm);
-	mm_nr_pmds_init(mm);
-	mm_nr_puds_init(mm);
+	mm_pgtables_bytes_init(mm);
 	mm->map_count = 0;
 	mm->locked_vm = 0;
 	mm->pinned_vm = 0;
@@ -869,15 +867,9 @@ static void check_mm(struct mm_struct *mm)
 					  "mm:%p idx:%d val:%ld\n", mm, i, x);
 	}
 
-	if (mm_nr_ptes(mm))
-		pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n",
-				mm_nr_ptes(mm));
-	if (mm_nr_pmds(mm))
-		pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n",
-				mm_nr_pmds(mm));
-	if (mm_nr_puds(mm))
-		pr_alert("BUG: non-zero nr_puds on freeing mm: %ld\n",
-				mm_nr_puds(mm));
+	if (mm_pgtables_bytes(mm))
+		pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
+				mm_pgtables_bytes(mm));
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 	VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
diff --git a/mm/debug.c b/mm/debug.c
index 177326818d24..299248a7fe0d 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -104,8 +104,7 @@ void dump_mm(const struct mm_struct *mm)
 		"get_unmapped_area %p\n"
 #endif
 		"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
-		"pgd %p mm_users %d mm_count %d\n"
-		"nr_ptes %lu nr_pmds %lu nr_puds %lu map_count %d\n"
+		"pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
 		"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
 		"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
 		"start_code %lx end_code %lx start_data %lx end_data %lx\n"
@@ -135,9 +134,7 @@ void dump_mm(const struct mm_struct *mm)
 		mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
 		mm->pgd, atomic_read(&mm->mm_users),
 		atomic_read(&mm->mm_count),
-		mm_nr_ptes(mm),
-		mm_nr_pmds(mm),
-		mm_nr_puds(mm),
+		mm_pgtables_bytes(mm),
 		mm->map_count,
 		mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
 		mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 851a0eec2624..a48280e64be6 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -200,7 +200,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 	 * task's rss, pagetable and swap space use.
 	 */
 	points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
-		mm_nr_ptes(p->mm) + mm_nr_pmds(p->mm) + mm_nr_puds(p->mm);
+		mm_pgtables_bytes(p->mm) / PAGE_SIZE;
 	task_unlock(p);
 
 	/*
@@ -368,15 +368,15 @@ static void select_bad_process(struct oom_control *oc)
  * Dumps the current memory state of all eligible tasks.  Tasks not in the same
  * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes
  * are not shown.
- * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes,
- * swapents, oom_score_adj value, and name.
+ * State information includes task's pid, uid, tgid, vm size, rss,
+ * pgtables_bytes, swapents, oom_score_adj value, and name.
  */
 static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	struct task_struct *p;
 	struct task_struct *task;
 
-	pr_info("[ pid ]   uid  tgid total_vm      rss nr_ptes nr_pmds nr_puds swapents oom_score_adj name\n");
+	pr_info("[ pid ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
 	rcu_read_lock();
 	for_each_process(p) {
 		if (oom_unkillable_task(p, memcg, nodemask))
@@ -392,12 +392,10 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 			continue;
 		}
 
-		pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %7ld %8lu         %5hd %s\n",
+		pr_info("[%5d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
 			task->pid, from_kuid(&init_user_ns, task_uid(task)),
 			task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
-			mm_nr_ptes(task->mm),
-			mm_nr_pmds(task->mm),
-			mm_nr_puds(task->mm),
+			mm_pgtables_bytes(task->mm),
 			get_mm_counter(task->mm, MM_SWAPENTS),
 			task->signal->oom_score_adj, task->comm);
 		task_unlock(task);
-- 
2.14.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes
  2017-10-05 10:14 [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes Kirill A. Shutemov
  2017-10-05 10:14 ` [PATCHv2 2/2] mm: Consolidate page table accounting Kirill A. Shutemov
@ 2017-10-06  4:02 ` Anshuman Khandual
  2017-10-06  9:55   ` Kirill A. Shutemov
  2017-10-06  8:50 ` Michal Hocko
  2017-10-06 23:10 ` Dave Hansen
  3 siblings, 1 reply; 9+ messages in thread
From: Anshuman Khandual @ 2017-10-06  4:02 UTC (permalink / raw)
  To: Kirill A. Shutemov, Andrew Morton
  Cc: linux-mm, linux-kernel, linux-api, Michal Hocko

On 10/05/2017 03:44 PM, Kirill A. Shutemov wrote:
> Let's add wrappers for ->nr_ptes with the same interface as for nr_pmd
> and nr_pud.
> 
> It's preparation for consolidation of page-table counters in mm_struct.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

Hey Kirill,

This patch does not apply cleanly either on mainline or on the latest
mmotm branch mmotm-2017-10-03-17-08. Is there any other branch like
'linux next' you might have rebased these patches against ?

- Anshuman

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes
  2017-10-05 10:14 [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes Kirill A. Shutemov
  2017-10-05 10:14 ` [PATCHv2 2/2] mm: Consolidate page table accounting Kirill A. Shutemov
  2017-10-06  4:02 ` [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes Anshuman Khandual
@ 2017-10-06  8:50 ` Michal Hocko
  2017-10-06  9:56   ` Kirill A. Shutemov
  2017-10-06 23:10 ` Dave Hansen
  3 siblings, 1 reply; 9+ messages in thread
From: Michal Hocko @ 2017-10-06  8:50 UTC (permalink / raw)
  To: Kirill A. Shutemov; +Cc: Andrew Morton, linux-mm, linux-kernel, linux-api

On Thu 05-10-17 13:14:41, Kirill A. Shutemov wrote:
> Let's add wrappers for ->nr_ptes with the same interface as for nr_pmd
> and nr_pud.
> 
> It's preparation for consolidation of page-table counters in mm_struct.

You are also making the accounting dependent on MMU which is OK because
no nommu arch really accounts page tables if there is anything like that
at all on those archs but it should be mentioned in the changelog.
 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  arch/arm/mm/pgd.c           |  2 +-
>  arch/sparc/mm/hugetlbpage.c |  2 +-
>  arch/unicore32/mm/pgd.c     |  2 +-
>  fs/proc/task_mmu.c          |  2 +-
>  include/linux/mm.h          | 32 ++++++++++++++++++++++++++++++++
>  include/linux/mm_types.h    |  2 ++
>  kernel/fork.c               |  6 +++---
>  mm/debug.c                  |  2 +-
>  mm/huge_memory.c            | 10 +++++-----
>  mm/khugepaged.c             |  2 +-
>  mm/memory.c                 |  8 ++++----
>  mm/oom_kill.c               |  5 ++---
>  12 files changed, 54 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
> index c1c1a5c67da1..61e281cb29fb 100644
> --- a/arch/arm/mm/pgd.c
> +++ b/arch/arm/mm/pgd.c
> @@ -141,7 +141,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd_base)
>  	pte = pmd_pgtable(*pmd);
>  	pmd_clear(pmd);
>  	pte_free(mm, pte);
> -	atomic_long_dec(&mm->nr_ptes);
> +	mm_dec_nr_ptes(mm);
>  no_pmd:
>  	pud_clear(pud);
>  	pmd_free(mm, pmd);
> diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
> index fd0d85808828..29fa5967b7d2 100644
> --- a/arch/sparc/mm/hugetlbpage.c
> +++ b/arch/sparc/mm/hugetlbpage.c
> @@ -396,7 +396,7 @@ static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
>  
>  	pmd_clear(pmd);
>  	pte_free_tlb(tlb, token, addr);
> -	atomic_long_dec(&tlb->mm->nr_ptes);
> +	mm_dec_nr_ptes(tlb->mm);
>  }
>  
>  static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
> diff --git a/arch/unicore32/mm/pgd.c b/arch/unicore32/mm/pgd.c
> index c572a28c76c9..a830a300aaa1 100644
> --- a/arch/unicore32/mm/pgd.c
> +++ b/arch/unicore32/mm/pgd.c
> @@ -97,7 +97,7 @@ void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd)
>  	pte = pmd_pgtable(*pmd);
>  	pmd_clear(pmd);
>  	pte_free(mm, pte);
> -	atomic_long_dec(&mm->nr_ptes);
> +	mm_dec_nr_ptes(mm);
>  	pmd_free(mm, pmd);
>  	mm_dec_nr_pmds(mm);
>  free:
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 627de66204bd..84c262d5197a 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -49,7 +49,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
>  	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
>  	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
>  	swap = get_mm_counter(mm, MM_SWAPENTS);
> -	ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes);
> +	ptes = PTRS_PER_PTE * sizeof(pte_t) * mm_nr_ptes(mm);
>  	pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
>  	puds = PTRS_PER_PUD * sizeof(pud_t) * mm_nr_puds(mm);
>  	seq_printf(m,
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 5125c51c9c35..e185dcdc5183 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1679,6 +1679,38 @@ static inline void mm_dec_nr_pmds(struct mm_struct *mm)
>  }
>  #endif
>  
> +#ifdef CONFIG_MMU
> +static inline void mm_nr_ptes_init(struct mm_struct *mm)
> +{
> +	atomic_long_set(&mm->nr_ptes, 0);
> +}
> +
> +static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
> +{
> +	return atomic_long_read(&mm->nr_ptes);
> +}
> +
> +static inline void mm_inc_nr_ptes(struct mm_struct *mm)
> +{
> +	atomic_long_inc(&mm->nr_ptes);
> +}
> +
> +static inline void mm_dec_nr_ptes(struct mm_struct *mm)
> +{
> +	atomic_long_dec(&mm->nr_ptes);
> +}
> +#else
> +static inline void mm_nr_ptes_init(struct mm_struct *mm) {}
> +
> +static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
> +{
> +	return 0;
> +}
> +
> +static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
> +static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
> +#endif
> +
>  int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
>  int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
>  
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 6c8c2bb9e5a1..95d0eefe1f4a 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -398,7 +398,9 @@ struct mm_struct {
>  	 */
>  	atomic_t mm_count;
>  
> +#ifdef CONFIG_MMU
>  	atomic_long_t nr_ptes;			/* PTE page table pages */
> +#endif
>  #if CONFIG_PGTABLE_LEVELS > 2
>  	atomic_long_t nr_pmds;			/* PMD page table pages */
>  #endif
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 5624918154db..d466181902cf 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -813,7 +813,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
>  	init_rwsem(&mm->mmap_sem);
>  	INIT_LIST_HEAD(&mm->mmlist);
>  	mm->core_state = NULL;
> -	atomic_long_set(&mm->nr_ptes, 0);
> +	mm_nr_ptes_init(mm);
>  	mm_nr_pmds_init(mm);
>  	mm_nr_puds_init(mm);
>  	mm->map_count = 0;
> @@ -869,9 +869,9 @@ static void check_mm(struct mm_struct *mm)
>  					  "mm:%p idx:%d val:%ld\n", mm, i, x);
>  	}
>  
> -	if (atomic_long_read(&mm->nr_ptes))
> +	if (mm_nr_ptes(mm))
>  		pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n",
> -				atomic_long_read(&mm->nr_ptes));
> +				mm_nr_ptes(mm));
>  	if (mm_nr_pmds(mm))
>  		pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n",
>  				mm_nr_pmds(mm));
> diff --git a/mm/debug.c b/mm/debug.c
> index afccb2565269..177326818d24 100644
> --- a/mm/debug.c
> +++ b/mm/debug.c
> @@ -135,7 +135,7 @@ void dump_mm(const struct mm_struct *mm)
>  		mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
>  		mm->pgd, atomic_read(&mm->mm_users),
>  		atomic_read(&mm->mm_count),
> -		atomic_long_read((atomic_long_t *)&mm->nr_ptes),
> +		mm_nr_ptes(mm),
>  		mm_nr_pmds(mm),
>  		mm_nr_puds(mm),
>  		mm->map_count,
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 269b5df58543..c037d3d34950 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -606,7 +606,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
>  		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
>  		set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
>  		add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
> -		atomic_long_inc(&vma->vm_mm->nr_ptes);
> +		mm_inc_nr_ptes(vma->vm_mm);
>  		spin_unlock(vmf->ptl);
>  		count_vm_event(THP_FAULT_ALLOC);
>  	}
> @@ -662,7 +662,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
>  	if (pgtable)
>  		pgtable_trans_huge_deposit(mm, pmd, pgtable);
>  	set_pmd_at(mm, haddr, pmd, entry);
> -	atomic_long_inc(&mm->nr_ptes);
> +	mm_inc_nr_ptes(mm);
>  	return true;
>  }
>  
> @@ -747,7 +747,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>  
>  	if (pgtable) {
>  		pgtable_trans_huge_deposit(mm, pmd, pgtable);
> -		atomic_long_inc(&mm->nr_ptes);
> +		mm_inc_nr_ptes(mm);
>  	}
>  
>  	set_pmd_at(mm, addr, pmd, entry);
> @@ -975,7 +975,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  	get_page(src_page);
>  	page_dup_rmap(src_page, true);
>  	add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
> -	atomic_long_inc(&dst_mm->nr_ptes);
> +	mm_inc_nr_ptes(dst_mm);
>  	pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
>  
>  	pmdp_set_wrprotect(src_mm, addr, src_pmd);
> @@ -1675,7 +1675,7 @@ static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
>  
>  	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
>  	pte_free(mm, pgtable);
> -	atomic_long_dec(&mm->nr_ptes);
> +	mm_dec_nr_ptes(mm);
>  }
>  
>  int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index c01f177a1120..9e36fe8857d9 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1269,7 +1269,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
>  			_pmd = pmdp_collapse_flush(vma, addr, pmd);
>  			spin_unlock(ptl);
>  			up_write(&vma->vm_mm->mmap_sem);
> -			atomic_long_dec(&vma->vm_mm->nr_ptes);
> +			mm_dec_nr_ptes(vma->vm_mm);
>  			pte_free(vma->vm_mm, pmd_pgtable(_pmd));
>  		}
>  	}
> diff --git a/mm/memory.c b/mm/memory.c
> index 291d4984b417..c443456dbd02 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -438,7 +438,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
>  	pgtable_t token = pmd_pgtable(*pmd);
>  	pmd_clear(pmd);
>  	pte_free_tlb(tlb, token, addr);
> -	atomic_long_dec(&tlb->mm->nr_ptes);
> +	mm_dec_nr_ptes(tlb->mm);
>  }
>  
>  static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
> @@ -666,7 +666,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
>  
>  	ptl = pmd_lock(mm, pmd);
>  	if (likely(pmd_none(*pmd))) {	/* Has another populated it ? */
> -		atomic_long_inc(&mm->nr_ptes);
> +		mm_inc_nr_ptes(mm);
>  		pmd_populate(mm, pmd, new);
>  		new = NULL;
>  	}
> @@ -3213,7 +3213,7 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
>  			goto map_pte;
>  		}
>  
> -		atomic_long_inc(&vma->vm_mm->nr_ptes);
> +		mm_inc_nr_ptes(vma->vm_mm);
>  		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
>  		spin_unlock(vmf->ptl);
>  		vmf->prealloc_pte = NULL;
> @@ -3272,7 +3272,7 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
>  	 * We are going to consume the prealloc table,
>  	 * count that as nr_ptes.
>  	 */
> -	atomic_long_inc(&vma->vm_mm->nr_ptes);
> +	mm_inc_nr_ptes(vma->vm_mm);
>  	vmf->prealloc_pte = NULL;
>  }
>  
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 4bee6968885d..851a0eec2624 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -200,8 +200,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
>  	 * task's rss, pagetable and swap space use.
>  	 */
>  	points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
> -		atomic_long_read(&p->mm->nr_ptes) + mm_nr_pmds(p->mm) +
> -		mm_nr_puds(p->mm);
> +		mm_nr_ptes(p->mm) + mm_nr_pmds(p->mm) + mm_nr_puds(p->mm);
>  	task_unlock(p);
>  
>  	/*
> @@ -396,7 +395,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
>  		pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %7ld %8lu         %5hd %s\n",
>  			task->pid, from_kuid(&init_user_ns, task_uid(task)),
>  			task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
> -			atomic_long_read(&task->mm->nr_ptes),
> +			mm_nr_ptes(task->mm),
>  			mm_nr_pmds(task->mm),
>  			mm_nr_puds(task->mm),
>  			get_mm_counter(task->mm, MM_SWAPENTS),
> -- 
> 2.14.2

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCHv2 2/2] mm: Consolidate page table accounting
  2017-10-05 10:14 ` [PATCHv2 2/2] mm: Consolidate page table accounting Kirill A. Shutemov
@ 2017-10-06  8:50   ` Michal Hocko
  0 siblings, 0 replies; 9+ messages in thread
From: Michal Hocko @ 2017-10-06  8:50 UTC (permalink / raw)
  To: Kirill A. Shutemov; +Cc: Andrew Morton, linux-mm, linux-kernel, linux-api

[CC linux-api because this is a user visible change]

On Thu 05-10-17 13:14:42, Kirill A. Shutemov wrote:
> Currently, we account page tables separately for each page table level,
> but that's redundant -- we only make use of total memory allocated to
> page tables for oom_badness calculation. We also provide the information
> to userspace, but it has dubious value there too.

I completely agree! The VmPMD has been added just in case wihtout any
specific use in mind.
 
> This patch switches page table accounting to single counter.
> 
> mm->pgtables_bytes is now used to account all page table levels. We use
> bytes, because page table size for different levels of page table tree
> may be different.
> 
> The change has user-visible effect: we don't have VmPMD and VmPUD
> reported in /proc/[pid]/status. Not sure if anybody uses them.
> (As alternative, we can always report 0 kB for them.

I would go with removing the value rather than faking it. If somebody
really depends on it then we will have to revert this.

> OOM-killer report is also slightly changed: we now report pgtables_bytes
> instead of nr_ptes, nr_pmd, nr_puds.

This will actually make the parsing easier because the script doesn't
have to care about different page table sizes which we didn't handle in
oom_badness properly as well.

> The benefit is that we now calculate oom_badness() more correctly for
> machines which have different size of page tables depending on level
> or where page tables are less than a page in size.

Not only that. Another benefit is that we reduce the number of counters
and the API maintenance.

The only downside can be debugability because we do not know which page
table level could leak. But I do not remember many bugs that would be
caught by separate counters so I wouldn't lose sleep over this.
 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

Acked-by: Michal Hocko <mhocko@suse.com>

Thanks for doing this! One less item on my todo list ;)

> ---
>  Documentation/filesystems/proc.txt |  1 -
>  Documentation/sysctl/vm.txt        |  8 +++---
>  fs/proc/task_mmu.c                 | 11 ++------
>  include/linux/mm.h                 | 58 ++++++++------------------------------
>  include/linux/mm_types.h           |  8 +-----
>  kernel/fork.c                      | 16 +++--------
>  mm/debug.c                         |  7 ++---
>  mm/oom_kill.c                      | 14 ++++-----
>  8 files changed, 31 insertions(+), 92 deletions(-)
> 
> diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
> index adba21b5ada7..ec571b9bb18a 100644
> --- a/Documentation/filesystems/proc.txt
> +++ b/Documentation/filesystems/proc.txt
> @@ -250,7 +250,6 @@ Table 1-2: Contents of the status files (as of 4.8)
>   VmExe                       size of text segment
>   VmLib                       size of shared library code
>   VmPTE                       size of page table entries
> - VmPMD                       size of second level page tables
>   VmSwap                      amount of swap used by anonymous private data
>                               (shmem swap usage is not included)
>   HugetlbPages                size of hugetlb memory portions
> diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
> index 2717b6f2d706..2db0596d12f4 100644
> --- a/Documentation/sysctl/vm.txt
> +++ b/Documentation/sysctl/vm.txt
> @@ -622,10 +622,10 @@ oom_dump_tasks
>  
>  Enables a system-wide task dump (excluding kernel threads) to be produced
>  when the kernel performs an OOM-killing and includes such information as
> -pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, nr_puds, swapents,
> -oom_score_adj score, and name.  This is helpful to determine why the OOM
> -killer was invoked, to identify the rogue task that caused it, and to
> -determine why the OOM killer chose the task it did to kill.
> +pid, uid, tgid, vm size, rss, pgtables_bytes, swapents, oom_score_adj
> +score, and name.  This is helpful to determine why the OOM killer was
> +invoked, to identify the rogue task that caused it, and to determine why
> +the OOM killer chose the task it did to kill.
>  
>  If this is set to zero, this information is suppressed.  On very
>  large systems with thousands of tasks it may not be feasible to dump
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 84c262d5197a..c9c81373225d 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -25,7 +25,7 @@
>  
>  void task_mem(struct seq_file *m, struct mm_struct *mm)
>  {
> -	unsigned long text, lib, swap, ptes, pmds, puds, anon, file, shmem;
> +	unsigned long text, lib, swap, anon, file, shmem;
>  	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
>  
>  	anon = get_mm_counter(mm, MM_ANONPAGES);
> @@ -49,9 +49,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
>  	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
>  	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
>  	swap = get_mm_counter(mm, MM_SWAPENTS);
> -	ptes = PTRS_PER_PTE * sizeof(pte_t) * mm_nr_ptes(mm);
> -	pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
> -	puds = PTRS_PER_PUD * sizeof(pud_t) * mm_nr_puds(mm);
>  	seq_printf(m,
>  		"VmPeak:\t%8lu kB\n"
>  		"VmSize:\t%8lu kB\n"
> @@ -67,8 +64,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
>  		"VmExe:\t%8lu kB\n"
>  		"VmLib:\t%8lu kB\n"
>  		"VmPTE:\t%8lu kB\n"
> -		"VmPMD:\t%8lu kB\n"
> -		"VmPUD:\t%8lu kB\n"
>  		"VmSwap:\t%8lu kB\n",
>  		hiwater_vm << (PAGE_SHIFT-10),
>  		total_vm << (PAGE_SHIFT-10),
> @@ -81,9 +76,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
>  		shmem << (PAGE_SHIFT-10),
>  		mm->data_vm << (PAGE_SHIFT-10),
>  		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
> -		ptes >> 10,
> -		pmds >> 10,
> -		puds >> 10,
> +		mm_pgtables_bytes(mm) >> 10,
>  		swap << (PAGE_SHIFT-10));
>  	hugetlb_report_usage(m, mm);
>  }
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index e185dcdc5183..a7e50c464021 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1604,37 +1604,20 @@ static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
>  {
>  	return 0;
>  }
> -
> -static inline unsigned long mm_nr_puds(const struct mm_struct *mm)
> -{
> -	return 0;
> -}
> -
> -static inline void mm_nr_puds_init(struct mm_struct *mm) {}
>  static inline void mm_inc_nr_puds(struct mm_struct *mm) {}
>  static inline void mm_dec_nr_puds(struct mm_struct *mm) {}
>  
>  #else
>  int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
>  
> -static inline void mm_nr_puds_init(struct mm_struct *mm)
> -{
> -	atomic_long_set(&mm->nr_puds, 0);
> -}
> -
> -static inline unsigned long mm_nr_puds(const struct mm_struct *mm)
> -{
> -	return atomic_long_read(&mm->nr_puds);
> -}
> -
>  static inline void mm_inc_nr_puds(struct mm_struct *mm)
>  {
> -	atomic_long_inc(&mm->nr_puds);
> +	atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
>  }
>  
>  static inline void mm_dec_nr_puds(struct mm_struct *mm)
>  {
> -	atomic_long_dec(&mm->nr_puds);
> +	atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
>  }
>  #endif
>  
> @@ -1645,64 +1628,47 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
>  	return 0;
>  }
>  
> -static inline void mm_nr_pmds_init(struct mm_struct *mm) {}
> -
> -static inline unsigned long mm_nr_pmds(const struct mm_struct *mm)
> -{
> -	return 0;
> -}
> -
>  static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
>  static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}
>  
>  #else
>  int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
>  
> -static inline void mm_nr_pmds_init(struct mm_struct *mm)
> -{
> -	atomic_long_set(&mm->nr_pmds, 0);
> -}
> -
> -static inline unsigned long mm_nr_pmds(const struct mm_struct *mm)
> -{
> -	return atomic_long_read(&mm->nr_pmds);
> -}
> -
>  static inline void mm_inc_nr_pmds(struct mm_struct *mm)
>  {
> -	atomic_long_inc(&mm->nr_pmds);
> +	atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
>  }
>  
>  static inline void mm_dec_nr_pmds(struct mm_struct *mm)
>  {
> -	atomic_long_dec(&mm->nr_pmds);
> +	atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
>  }
>  #endif
>  
>  #ifdef CONFIG_MMU
> -static inline void mm_nr_ptes_init(struct mm_struct *mm)
> +static inline void mm_pgtables_bytes_init(struct mm_struct *mm)
>  {
> -	atomic_long_set(&mm->nr_ptes, 0);
> +	atomic_long_set(&mm->pgtables_bytes, 0);
>  }
>  
> -static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
> +static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm)
>  {
> -	return atomic_long_read(&mm->nr_ptes);
> +	return atomic_long_read(&mm->pgtables_bytes);
>  }
>  
>  static inline void mm_inc_nr_ptes(struct mm_struct *mm)
>  {
> -	atomic_long_inc(&mm->nr_ptes);
> +	atomic_long_add(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes);
>  }
>  
>  static inline void mm_dec_nr_ptes(struct mm_struct *mm)
>  {
> -	atomic_long_dec(&mm->nr_ptes);
> +	atomic_long_sub(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes);
>  }
>  #else
> -static inline void mm_nr_ptes_init(struct mm_struct *mm) {}
>  
> -static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
> +static inline void mm_pgtables_bytes_init(struct mm_struct *mm) {}
> +static inline unsigned long mm_pgtables_bytes(struct mm_struct *mm)
>  {
>  	return 0;
>  }
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 95d0eefe1f4a..aadd23377fbb 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -399,13 +399,7 @@ struct mm_struct {
>  	atomic_t mm_count;
>  
>  #ifdef CONFIG_MMU
> -	atomic_long_t nr_ptes;			/* PTE page table pages */
> -#endif
> -#if CONFIG_PGTABLE_LEVELS > 2
> -	atomic_long_t nr_pmds;			/* PMD page table pages */
> -#endif
> -#if CONFIG_PGTABLE_LEVELS > 3
> -	atomic_long_t nr_puds;			/* PUD page table pages */
> +	atomic_long_t pgtables_bytes;		/* PTE page table pages */
>  #endif
>  	int map_count;				/* number of VMAs */
>  
> diff --git a/kernel/fork.c b/kernel/fork.c
> index d466181902cf..ad849ccdad9e 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -813,9 +813,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
>  	init_rwsem(&mm->mmap_sem);
>  	INIT_LIST_HEAD(&mm->mmlist);
>  	mm->core_state = NULL;
> -	mm_nr_ptes_init(mm);
> -	mm_nr_pmds_init(mm);
> -	mm_nr_puds_init(mm);
> +	mm_pgtables_bytes_init(mm);
>  	mm->map_count = 0;
>  	mm->locked_vm = 0;
>  	mm->pinned_vm = 0;
> @@ -869,15 +867,9 @@ static void check_mm(struct mm_struct *mm)
>  					  "mm:%p idx:%d val:%ld\n", mm, i, x);
>  	}
>  
> -	if (mm_nr_ptes(mm))
> -		pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n",
> -				mm_nr_ptes(mm));
> -	if (mm_nr_pmds(mm))
> -		pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n",
> -				mm_nr_pmds(mm));
> -	if (mm_nr_puds(mm))
> -		pr_alert("BUG: non-zero nr_puds on freeing mm: %ld\n",
> -				mm_nr_puds(mm));
> +	if (mm_pgtables_bytes(mm))
> +		pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
> +				mm_pgtables_bytes(mm));
>  
>  #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
>  	VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
> diff --git a/mm/debug.c b/mm/debug.c
> index 177326818d24..299248a7fe0d 100644
> --- a/mm/debug.c
> +++ b/mm/debug.c
> @@ -104,8 +104,7 @@ void dump_mm(const struct mm_struct *mm)
>  		"get_unmapped_area %p\n"
>  #endif
>  		"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
> -		"pgd %p mm_users %d mm_count %d\n"
> -		"nr_ptes %lu nr_pmds %lu nr_puds %lu map_count %d\n"
> +		"pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
>  		"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
>  		"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
>  		"start_code %lx end_code %lx start_data %lx end_data %lx\n"
> @@ -135,9 +134,7 @@ void dump_mm(const struct mm_struct *mm)
>  		mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
>  		mm->pgd, atomic_read(&mm->mm_users),
>  		atomic_read(&mm->mm_count),
> -		mm_nr_ptes(mm),
> -		mm_nr_pmds(mm),
> -		mm_nr_puds(mm),
> +		mm_pgtables_bytes(mm),
>  		mm->map_count,
>  		mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
>  		mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm,
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 851a0eec2624..a48280e64be6 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -200,7 +200,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
>  	 * task's rss, pagetable and swap space use.
>  	 */
>  	points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
> -		mm_nr_ptes(p->mm) + mm_nr_pmds(p->mm) + mm_nr_puds(p->mm);
> +		mm_pgtables_bytes(p->mm) / PAGE_SIZE;
>  	task_unlock(p);
>  
>  	/*
> @@ -368,15 +368,15 @@ static void select_bad_process(struct oom_control *oc)
>   * Dumps the current memory state of all eligible tasks.  Tasks not in the same
>   * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes
>   * are not shown.
> - * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes,
> - * swapents, oom_score_adj value, and name.
> + * State information includes task's pid, uid, tgid, vm size, rss,
> + * pgtables_bytes, swapents, oom_score_adj value, and name.
>   */
>  static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
>  {
>  	struct task_struct *p;
>  	struct task_struct *task;
>  
> -	pr_info("[ pid ]   uid  tgid total_vm      rss nr_ptes nr_pmds nr_puds swapents oom_score_adj name\n");
> +	pr_info("[ pid ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
>  	rcu_read_lock();
>  	for_each_process(p) {
>  		if (oom_unkillable_task(p, memcg, nodemask))
> @@ -392,12 +392,10 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
>  			continue;
>  		}
>  
> -		pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %7ld %8lu         %5hd %s\n",
> +		pr_info("[%5d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
>  			task->pid, from_kuid(&init_user_ns, task_uid(task)),
>  			task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
> -			mm_nr_ptes(task->mm),
> -			mm_nr_pmds(task->mm),
> -			mm_nr_puds(task->mm),
> +			mm_pgtables_bytes(task->mm),
>  			get_mm_counter(task->mm, MM_SWAPENTS),
>  			task->signal->oom_score_adj, task->comm);
>  		task_unlock(task);
> -- 
> 2.14.2

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes
  2017-10-06  4:02 ` [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes Anshuman Khandual
@ 2017-10-06  9:55   ` Kirill A. Shutemov
  0 siblings, 0 replies; 9+ messages in thread
From: Kirill A. Shutemov @ 2017-10-06  9:55 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Kirill A. Shutemov, Andrew Morton, linux-mm, linux-kernel,
	linux-api, Michal Hocko

On Fri, Oct 06, 2017 at 09:32:03AM +0530, Anshuman Khandual wrote:
> On 10/05/2017 03:44 PM, Kirill A. Shutemov wrote:
> > Let's add wrappers for ->nr_ptes with the same interface as for nr_pmd
> > and nr_pud.
> > 
> > It's preparation for consolidation of page-table counters in mm_struct.
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> 
> Hey Kirill,
> 
> This patch does not apply cleanly either on mainline or on the latest
> mmotm branch mmotm-2017-10-03-17-08. Is there any other branch like
> 'linux next' you might have rebased these patches against ?

It's against mmots. There's pud page tables accounting patch we depent
onto.

-- 
 Kirill A. Shutemov

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes
  2017-10-06  8:50 ` Michal Hocko
@ 2017-10-06  9:56   ` Kirill A. Shutemov
  0 siblings, 0 replies; 9+ messages in thread
From: Kirill A. Shutemov @ 2017-10-06  9:56 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Kirill A. Shutemov, Andrew Morton, linux-mm, linux-kernel, linux-api

On Fri, Oct 06, 2017 at 10:50:38AM +0200, Michal Hocko wrote:
> On Thu 05-10-17 13:14:41, Kirill A. Shutemov wrote:
> > Let's add wrappers for ->nr_ptes with the same interface as for nr_pmd
> > and nr_pud.
> > 
> > It's preparation for consolidation of page-table counters in mm_struct.
> 
> You are also making the accounting dependent on MMU which is OK because
> no nommu arch really accounts page tables if there is anything like that
> at all on those archs but it should be mentioned in the changelog.

Okay, I'll update change log.

> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> 
> Acked-by: Michal Hocko <mhocko@suse.com>

Thanks.

-- 
 Kirill A. Shutemov

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes
  2017-10-05 10:14 [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes Kirill A. Shutemov
                   ` (2 preceding siblings ...)
  2017-10-06  8:50 ` Michal Hocko
@ 2017-10-06 23:10 ` Dave Hansen
  2017-10-08 12:54   ` Kirill A. Shutemov
  3 siblings, 1 reply; 9+ messages in thread
From: Dave Hansen @ 2017-10-06 23:10 UTC (permalink / raw)
  To: Kirill A. Shutemov, Andrew Morton
  Cc: linux-mm, linux-kernel, linux-api, Michal Hocko

On 10/05/2017 03:14 AM, Kirill A. Shutemov wrote:
> +++ b/arch/sparc/mm/hugetlbpage.c
> @@ -396,7 +396,7 @@ static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
>  
>  	pmd_clear(pmd);
>  	pte_free_tlb(tlb, token, addr);
> -	atomic_long_dec(&tlb->mm->nr_ptes);
> +	mm_dec_nr_ptes(tlb->mm);
>  }

If we're going to go replace all of these, I wonder if we should start
doing it more generically.

	mm_dec_nr_pgtable(PGTABLE_PTE, tlb->mm)

or even:

	mm_dec_nr_pgtable(PGTABLE_LEVEL1, tlb->mm)

Instead of having a separate batch of functions for each level.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes
  2017-10-06 23:10 ` Dave Hansen
@ 2017-10-08 12:54   ` Kirill A. Shutemov
  0 siblings, 0 replies; 9+ messages in thread
From: Kirill A. Shutemov @ 2017-10-08 12:54 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Kirill A. Shutemov, Andrew Morton, linux-mm, linux-kernel,
	linux-api, Michal Hocko

On Fri, Oct 06, 2017 at 04:10:31PM -0700, Dave Hansen wrote:
> On 10/05/2017 03:14 AM, Kirill A. Shutemov wrote:
> > +++ b/arch/sparc/mm/hugetlbpage.c
> > @@ -396,7 +396,7 @@ static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
> >  
> >  	pmd_clear(pmd);
> >  	pte_free_tlb(tlb, token, addr);
> > -	atomic_long_dec(&tlb->mm->nr_ptes);
> > +	mm_dec_nr_ptes(tlb->mm);
> >  }
> 
> If we're going to go replace all of these, I wonder if we should start
> doing it more generically.
> 
> 	mm_dec_nr_pgtable(PGTABLE_PTE, tlb->mm)
> 
> or even:
> 
> 	mm_dec_nr_pgtable(PGTABLE_LEVEL1, tlb->mm)
> 
> Instead of having a separate batch of functions for each level.

We don't have this kind of consolidation for any other page table related
helpers. Don't see a reason to start here.

This kind of changes can be part of overal page table privitives redesign
once/if we get there.

But feel free to send patches. :)

-- 
 Kirill A. Shutemov

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-10-08 12:54 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-05 10:14 [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes Kirill A. Shutemov
2017-10-05 10:14 ` [PATCHv2 2/2] mm: Consolidate page table accounting Kirill A. Shutemov
2017-10-06  8:50   ` Michal Hocko
2017-10-06  4:02 ` [PATCHv2 1/2] mm: Introduce wrappers to access mm->nr_ptes Anshuman Khandual
2017-10-06  9:55   ` Kirill A. Shutemov
2017-10-06  8:50 ` Michal Hocko
2017-10-06  9:56   ` Kirill A. Shutemov
2017-10-06 23:10 ` Dave Hansen
2017-10-08 12:54   ` Kirill A. Shutemov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).