linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH uprobe, thp 0/4] THP aware uprobe
@ 2019-05-29 21:20 Song Liu
  2019-05-29 21:20 ` [PATCH uprobe, thp 1/4] mm, thp: allow preallocate pgtable for split_huge_pmd_address() Song Liu
                   ` (3 more replies)
  0 siblings, 4 replies; 17+ messages in thread
From: Song Liu @ 2019-05-29 21:20 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: namit, peterz, oleg, rostedt, mhiramat, matthew.wilcox,
	kirill.shutemov, kernel-team, william.kucharski, chad.mynhier,
	mike.kravetz, Song Liu

This set makes uprobe aware of THPs.

Currently, when uprobe is attached to text on THP, the page is split by
FOLL_SPLIT. As a result, uprobe eliminates the performance benefit of THP.

This set makes uprobe THP-aware. Instead of FOLL_SPLIT, we only split PMD
for uprobe. After all uprobes within the THP are removed, the PTEs are
regrouped into huge PMD.

Note that, with uprobes attached, the process runs with PTEs for the huge
page. The performance benefit of THP is recovered _after_ all uprobes on
the huge page are detached.

This set (plus a few small debug patches) is also available at

   https://github.com/liu-song-6/linux/tree/uprobe-thp

Song Liu (4):
  mm, thp: allow preallocate pgtable for split_huge_pmd_address()
  uprobe: use original page when all uprobes are removed
  uprobe: support huge page by only splitting the pmd
  uprobe: collapse THP pmd after removing all uprobes

 include/linux/huge_mm.h |  22 ++++++++-
 kernel/events/uprobes.c |  82 +++++++++++++++++++++++++------
 mm/huge_memory.c        | 104 ++++++++++++++++++++++++++++++++++++----
 mm/rmap.c               |   2 +-
 4 files changed, 183 insertions(+), 27 deletions(-)

--
2.17.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH uprobe, thp 1/4] mm, thp: allow preallocate pgtable for split_huge_pmd_address()
  2019-05-29 21:20 [PATCH uprobe, thp 0/4] THP aware uprobe Song Liu
@ 2019-05-29 21:20 ` Song Liu
  2019-05-30 11:10   ` Kirill A. Shutemov
  2019-05-29 21:20 ` [PATCH uprobe, thp 2/4] uprobe: use original page when all uprobes are removed Song Liu
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 17+ messages in thread
From: Song Liu @ 2019-05-29 21:20 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: namit, peterz, oleg, rostedt, mhiramat, matthew.wilcox,
	kirill.shutemov, kernel-team, william.kucharski, chad.mynhier,
	mike.kravetz, Song Liu

Currently, __split_huge_pmd_locked() uses page fault to handle file backed
THP. This is required because splitting pmd requires allocating a new
pgtable.

This patch allows the caller of __split_huge_pmd_locked() and
split_huge_pmd_address() to preallocate the pgtable, so that refault is
not required.

This is useful when the caller of split_huge_pmd_address() would like to
use small pages before refault.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/linux/huge_mm.h |  5 +++--
 mm/huge_memory.c        | 33 +++++++++++++++++++++++----------
 mm/rmap.c               |  2 +-
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7cd5c150c21d..2d8a40fd06e4 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -161,7 +161,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 
 
 void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
-		bool freeze, struct page *page);
+		bool freeze, struct page *page, pgtable_t prealloc_pgtable);
 
 void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
 		unsigned long address);
@@ -299,7 +299,8 @@ static inline void deferred_split_huge_page(struct page *page) {}
 static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long address, bool freeze, struct page *page) {}
 static inline void split_huge_pmd_address(struct vm_area_struct *vma,
-		unsigned long address, bool freeze, struct page *page) {}
+		unsigned long address, bool freeze, struct page *page,
+		pgtable_t prealloc_pgtable) {}
 
 #define split_huge_pud(__vma, __pmd, __address)	\
 	do { } while (0)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9f8bce9a6b32..dcb0e30213af 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2118,7 +2118,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 }
 
 static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long haddr, bool freeze)
+		unsigned long haddr, bool freeze, pgtable_t prealloc_pgtable)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct page *page;
@@ -2133,10 +2133,15 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
 	VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd)
 				&& !pmd_devmap(*pmd));
+	/* only file backed vma need preallocate pgtable*/
+	VM_BUG_ON(vma_is_anonymous(vma) && prealloc_pgtable);
 
 	count_vm_event(THP_SPLIT_PMD);
 
-	if (!vma_is_anonymous(vma)) {
+	if (prealloc_pgtable) {
+		pgtable_trans_huge_deposit(mm, pmd, prealloc_pgtable);
+		mm_inc_nr_pmds(mm);
+	} else if (!vma_is_anonymous(vma)) {
 		_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
 		/*
 		 * We are going to unmap this huge page. So
@@ -2277,8 +2282,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 	}
 }
 
-void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long address, bool freeze, struct page *page)
+static void ____split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+		unsigned long address, bool freeze, struct page *page,
+		pgtable_t prealloc_pgtable)
 {
 	spinlock_t *ptl;
 	struct mmu_notifier_range range;
@@ -2303,7 +2309,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			clear_page_mlock(page);
 	} else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd)))
 		goto out;
-	__split_huge_pmd_locked(vma, pmd, range.start, freeze);
+	__split_huge_pmd_locked(vma, pmd, range.start, freeze,
+				prealloc_pgtable);
 out:
 	spin_unlock(ptl);
 	/*
@@ -2322,8 +2329,14 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 	mmu_notifier_invalidate_range_only_end(&range);
 }
 
+void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+		unsigned long address, bool freeze, struct page *page)
+{
+	____split_huge_pmd(vma, pmd, address, freeze, page, NULL);
+}
+
 void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
-		bool freeze, struct page *page)
+		bool freeze, struct page *page, pgtable_t prealloc_pgtable)
 {
 	pgd_t *pgd;
 	p4d_t *p4d;
@@ -2344,7 +2357,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 
 	pmd = pmd_offset(pud, address);
 
-	__split_huge_pmd(vma, pmd, address, freeze, page);
+	____split_huge_pmd(vma, pmd, address, freeze, page, prealloc_pgtable);
 }
 
 void vma_adjust_trans_huge(struct vm_area_struct *vma,
@@ -2360,7 +2373,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 	if (start & ~HPAGE_PMD_MASK &&
 	    (start & HPAGE_PMD_MASK) >= vma->vm_start &&
 	    (start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
-		split_huge_pmd_address(vma, start, false, NULL);
+		split_huge_pmd_address(vma, start, false, NULL, NULL);
 
 	/*
 	 * If the new end address isn't hpage aligned and it could
@@ -2370,7 +2383,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 	if (end & ~HPAGE_PMD_MASK &&
 	    (end & HPAGE_PMD_MASK) >= vma->vm_start &&
 	    (end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
-		split_huge_pmd_address(vma, end, false, NULL);
+		split_huge_pmd_address(vma, end, false, NULL, NULL);
 
 	/*
 	 * If we're also updating the vma->vm_next->vm_start, if the new
@@ -2384,7 +2397,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 		if (nstart & ~HPAGE_PMD_MASK &&
 		    (nstart & HPAGE_PMD_MASK) >= next->vm_start &&
 		    (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
-			split_huge_pmd_address(next, nstart, false, NULL);
+			split_huge_pmd_address(next, nstart, false, NULL, NULL);
 	}
 }
 
diff --git a/mm/rmap.c b/mm/rmap.c
index e5dfe2ae6b0d..6970d732507c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1361,7 +1361,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
 	if (flags & TTU_SPLIT_HUGE_PMD) {
 		split_huge_pmd_address(vma, address,
-				flags & TTU_SPLIT_FREEZE, page);
+				flags & TTU_SPLIT_FREEZE, page, NULL);
 	}
 
 	/*
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH uprobe, thp 2/4] uprobe: use original page when all uprobes are removed
  2019-05-29 21:20 [PATCH uprobe, thp 0/4] THP aware uprobe Song Liu
  2019-05-29 21:20 ` [PATCH uprobe, thp 1/4] mm, thp: allow preallocate pgtable for split_huge_pmd_address() Song Liu
@ 2019-05-29 21:20 ` Song Liu
  2019-05-30 11:17   ` Kirill A. Shutemov
  2019-05-29 21:20 ` [PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd Song Liu
  2019-05-29 21:20 ` [PATCH uprobe, thp 4/4] uprobe: collapse THP pmd after removing all uprobes Song Liu
  3 siblings, 1 reply; 17+ messages in thread
From: Song Liu @ 2019-05-29 21:20 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: namit, peterz, oleg, rostedt, mhiramat, matthew.wilcox,
	kirill.shutemov, kernel-team, william.kucharski, chad.mynhier,
	mike.kravetz, Song Liu

Currently, uprobe swaps the target page with a anonymous page in both
install_breakpoint() and remove_breakpoint(). When all uprobes on a page
are removed, the given mm is still using an anonymous page (not the
original page).

This patch allows uprobe to use original page when possible (all uprobes
on the page are already removed).

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 kernel/events/uprobes.c | 43 ++++++++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 78f61bfc6b79..ba49da99d2a2 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -160,16 +160,19 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	int err;
 	struct mmu_notifier_range range;
 	struct mem_cgroup *memcg;
+	bool orig = new_page->mapping != NULL;  /* new_page == orig_page */
 
 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
 				addr + PAGE_SIZE);
 
 	VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
 
-	err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
-			false);
-	if (err)
-		return err;
+	if (!orig) {
+		err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
+					    &memcg, false);
+		if (err)
+			return err;
+	}
 
 	/* For try_to_free_swap() and munlock_vma_page() below */
 	lock_page(old_page);
@@ -177,15 +180,22 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	mmu_notifier_invalidate_range_start(&range);
 	err = -EAGAIN;
 	if (!page_vma_mapped_walk(&pvmw)) {
-		mem_cgroup_cancel_charge(new_page, memcg, false);
+		if (!orig)
+			mem_cgroup_cancel_charge(new_page, memcg, false);
 		goto unlock;
 	}
 	VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
 
 	get_page(new_page);
-	page_add_new_anon_rmap(new_page, vma, addr, false);
-	mem_cgroup_commit_charge(new_page, memcg, false, false);
-	lru_cache_add_active_or_unevictable(new_page, vma);
+	if (orig) {
+		page_add_file_rmap(new_page, false);
+		inc_mm_counter(mm, mm_counter_file(new_page));
+		dec_mm_counter(mm, MM_ANONPAGES);
+	} else {
+		page_add_new_anon_rmap(new_page, vma, addr, false);
+		mem_cgroup_commit_charge(new_page, memcg, false, false);
+		lru_cache_add_active_or_unevictable(new_page, vma);
+	}
 
 	if (!PageAnon(old_page)) {
 		dec_mm_counter(mm, mm_counter_file(old_page));
@@ -461,9 +471,10 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 			unsigned long vaddr, uprobe_opcode_t opcode)
 {
 	struct uprobe *uprobe;
-	struct page *old_page, *new_page;
+	struct page *old_page, *new_page, *orig_page = NULL;
 	struct vm_area_struct *vma;
 	int ret, is_register, ref_ctr_updated = 0;
+	pgoff_t index;
 
 	is_register = is_swbp_insn(&opcode);
 	uprobe = container_of(auprobe, struct uprobe, arch);
@@ -501,6 +512,20 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	copy_highpage(new_page, old_page);
 	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
+	index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
+	orig_page = find_get_page(vma->vm_file->f_inode->i_mapping, index);
+	if (orig_page) {
+		if (memcmp(page_address(orig_page),
+			   page_address(new_page), PAGE_SIZE) == 0) {
+			/* if new_page matches orig_page, use orig_page */
+			put_page(new_page);
+			new_page = orig_page;
+		} else {
+			put_page(orig_page);
+			orig_page = NULL;
+		}
+	}
+
 	ret = __replace_page(vma, vaddr, old_page, new_page);
 	put_page(new_page);
 put_old:
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd
  2019-05-29 21:20 [PATCH uprobe, thp 0/4] THP aware uprobe Song Liu
  2019-05-29 21:20 ` [PATCH uprobe, thp 1/4] mm, thp: allow preallocate pgtable for split_huge_pmd_address() Song Liu
  2019-05-29 21:20 ` [PATCH uprobe, thp 2/4] uprobe: use original page when all uprobes are removed Song Liu
@ 2019-05-29 21:20 ` Song Liu
  2019-05-30 11:08   ` William Kucharski
  2019-05-30 12:14   ` Kirill A. Shutemov
  2019-05-29 21:20 ` [PATCH uprobe, thp 4/4] uprobe: collapse THP pmd after removing all uprobes Song Liu
  3 siblings, 2 replies; 17+ messages in thread
From: Song Liu @ 2019-05-29 21:20 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: namit, peterz, oleg, rostedt, mhiramat, matthew.wilcox,
	kirill.shutemov, kernel-team, william.kucharski, chad.mynhier,
	mike.kravetz, Song Liu

Instead of splitting the compound page with FOLL_SPLIT, this patch allows
uprobe to only split pmd for huge pages.

A helper function mm_address_trans_huge(mm, address) was introduced to
test whether the address in mm is pointing to THP.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/linux/huge_mm.h |  8 ++++++++
 kernel/events/uprobes.c | 38 ++++++++++++++++++++++++++++++++------
 mm/huge_memory.c        | 24 ++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 2d8a40fd06e4..4832d6580969 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -163,6 +163,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 		bool freeze, struct page *page, pgtable_t prealloc_pgtable);
 
+bool mm_address_trans_huge(struct mm_struct *mm, unsigned long address);
+
 void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
 		unsigned long address);
 
@@ -302,6 +304,12 @@ static inline void split_huge_pmd_address(struct vm_area_struct *vma,
 		unsigned long address, bool freeze, struct page *page,
 		pgtable_t prealloc_pgtable) {}
 
+static inline bool mm_address_trans_huge(struct mm_struct *mm,
+					 unsigned long address)
+{
+	return false;
+}
+
 #define split_huge_pud(__vma, __pmd, __address)	\
 	do { } while (0)
 
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ba49da99d2a2..56eeccc2f7a2 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -26,6 +26,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/task_work.h>
 #include <linux/shmem_fs.h>
+#include <asm/pgalloc.h>
 
 #include <linux/uprobes.h>
 
@@ -153,7 +154,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct page_vma_mapped_walk pvmw = {
-		.page = old_page,
+		.page = compound_head(old_page),
 		.vma = vma,
 		.address = addr,
 	};
@@ -165,8 +166,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
 				addr + PAGE_SIZE);
 
-	VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
-
 	if (!orig) {
 		err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
 					    &memcg, false);
@@ -188,7 +187,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
 	get_page(new_page);
 	if (orig) {
-		page_add_file_rmap(new_page, false);
+		page_add_file_rmap(compound_head(new_page),
+				   PageTransHuge(compound_head(new_page)));
 		inc_mm_counter(mm, mm_counter_file(new_page));
 		dec_mm_counter(mm, MM_ANONPAGES);
 	} else {
@@ -207,7 +207,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	set_pte_at_notify(mm, addr, pvmw.pte,
 			mk_pte(new_page, vma->vm_page_prot));
 
-	page_remove_rmap(old_page, false);
+	page_remove_rmap(compound_head(old_page),
+			 PageTransHuge(compound_head(old_page)));
 	if (!page_mapped(old_page))
 		try_to_free_swap(old_page);
 	page_vma_mapped_walk_done(&pvmw);
@@ -475,17 +476,42 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	struct vm_area_struct *vma;
 	int ret, is_register, ref_ctr_updated = 0;
 	pgoff_t index;
+	pgtable_t prealloc_pgtable = NULL;
+	unsigned long foll_flags = FOLL_FORCE;
 
 	is_register = is_swbp_insn(&opcode);
 	uprobe = container_of(auprobe, struct uprobe, arch);
 
+	/* do not FOLL_SPLIT yet */
+	ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+			foll_flags, &old_page, &vma, NULL);
+
+	if (ret <= 0)
+		return ret;
+
+	if (mm_address_trans_huge(mm, vaddr)) {
+		prealloc_pgtable = pte_alloc_one(mm);
+		if (likely(prealloc_pgtable)) {
+			split_huge_pmd_address(vma, vaddr, false, NULL,
+					       prealloc_pgtable);
+			goto verify;
+		} else {
+			/* fallback to FOLL_SPLIT */
+			foll_flags |= FOLL_SPLIT;
+			put_page(old_page);
+		}
+	} else {
+		goto verify;
+	}
+
 retry:
 	/* Read the page with vaddr into memory */
 	ret = get_user_pages_remote(NULL, mm, vaddr, 1,
-			FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
+			foll_flags, &old_page, &vma, NULL);
 	if (ret <= 0)
 		return ret;
 
+verify:
 	ret = verify_opcode(old_page, vaddr, &opcode);
 	if (ret <= 0)
 		goto put_old;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index dcb0e30213af..4714871353c0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2360,6 +2360,30 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 	____split_huge_pmd(vma, pmd, address, freeze, page, prealloc_pgtable);
 }
 
+bool mm_address_trans_huge(struct mm_struct *mm, unsigned long address)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pgd = pgd_offset(mm, address);
+	if (!pgd_present(*pgd))
+		return false;
+
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		return false;
+
+	pud = pud_offset(p4d, address);
+	if (!pud_present(*pud))
+		return false;
+
+	pmd = pmd_offset(pud, address);
+
+	return pmd_trans_huge(*pmd);
+}
+
 void vma_adjust_trans_huge(struct vm_area_struct *vma,
 			     unsigned long start,
 			     unsigned long end,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH uprobe, thp 4/4] uprobe: collapse THP pmd after removing all uprobes
  2019-05-29 21:20 [PATCH uprobe, thp 0/4] THP aware uprobe Song Liu
                   ` (2 preceding siblings ...)
  2019-05-29 21:20 ` [PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd Song Liu
@ 2019-05-29 21:20 ` Song Liu
  2019-05-30 12:20   ` Kirill A. Shutemov
  3 siblings, 1 reply; 17+ messages in thread
From: Song Liu @ 2019-05-29 21:20 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: namit, peterz, oleg, rostedt, mhiramat, matthew.wilcox,
	kirill.shutemov, kernel-team, william.kucharski, chad.mynhier,
	mike.kravetz, Song Liu

After all uprobes are removed from the huge page (with PTE pgtable), it
is possible to collapse the pmd and benefit from THP again. This patch
does the collapse.

An issue on earlier version was discovered by kbuild test robot.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/linux/huge_mm.h |  9 ++++++++
 kernel/events/uprobes.c |  3 +++
 mm/huge_memory.c        | 47 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 4832d6580969..61f6d574d9b4 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -252,6 +252,10 @@ static inline bool thp_migration_supported(void)
 	return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
 }
 
+extern inline void try_collapse_huge_pmd(struct mm_struct *mm,
+					 struct vm_area_struct *vma,
+					 unsigned long vaddr);
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -377,6 +381,11 @@ static inline bool thp_migration_supported(void)
 {
 	return false;
 }
+
+static inline void try_collapse_huge_pmd(struct mm_struct *mm,
+					 struct vm_area_struct *vma,
+					 unsigned long vaddr) {}
+
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 56eeccc2f7a2..422617bdd5ff 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -564,6 +564,9 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	if (ret && is_register && ref_ctr_updated)
 		update_ref_ctr(uprobe, mm, -1);
 
+	if (!ret && orig_page && PageTransCompound(orig_page))
+		try_collapse_huge_pmd(mm, vma, vaddr);
+
 	return ret;
 }
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4714871353c0..e2edec3ffd43 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2923,6 +2923,53 @@ static struct shrinker deferred_split_shrinker = {
 	.flags = SHRINKER_NUMA_AWARE,
 };
 
+/**
+ * This function only checks whether all PTEs in this PMD point to
+ * continuous pages, the caller should make sure at least of these PTEs
+ * points to a huge page, e.g. PageTransCompound(one_page) != 0.
+ */
+void try_collapse_huge_pmd(struct mm_struct *mm,
+			   struct vm_area_struct *vma,
+			   unsigned long vaddr)
+{
+	struct mmu_notifier_range range;
+	unsigned long addr;
+	pmd_t *pmd, _pmd;
+	spinlock_t *ptl;
+	long long head;
+	int i;
+
+	pmd = mm_find_pmd(mm, vaddr);
+	if (!pmd)
+		return;
+
+	addr = vaddr & HPAGE_PMD_MASK;
+	head = pte_val(*pte_offset_map(pmd, addr));
+	ptl = pmd_lock(mm, pmd);
+	for (i = 0; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
+		pte_t *pte = pte_offset_map(pmd, addr);
+
+		if (pte_val(*pte) != head + i * PAGE_SIZE) {
+			spin_unlock(ptl);
+			return;
+		}
+	}
+
+	addr = vaddr & HPAGE_PMD_MASK;
+	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm,
+				addr, addr + HPAGE_PMD_SIZE);
+	mmu_notifier_invalidate_range_start(&range);
+
+	_pmd = pmdp_collapse_flush(vma, addr, pmd);
+	spin_unlock(ptl);
+	mmu_notifier_invalidate_range_end(&range);
+	mm_dec_nr_ptes(mm);
+	pte_free(mm, pmd_pgtable(_pmd));
+	add_mm_counter(mm,
+		       shmem_file(vma->vm_file) ? MM_SHMEMPAGES : MM_FILEPAGES,
+		       -HPAGE_PMD_NR);
+}
+
 #ifdef CONFIG_DEBUG_FS
 static int split_huge_pages_set(void *data, u64 val)
 {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd
  2019-05-29 21:20 ` [PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd Song Liu
@ 2019-05-30 11:08   ` William Kucharski
  2019-05-30 17:24     ` Song Liu
  2019-05-30 12:14   ` Kirill A. Shutemov
  1 sibling, 1 reply; 17+ messages in thread
From: William Kucharski @ 2019-05-30 11:08 UTC (permalink / raw)
  To: Song Liu
  Cc: LKML, Linux-MM, namit, Peter Zijlstra, oleg, Steven Rostedt,
	mhiramat, Matthew Wilcox, kirill.shutemov, kernel-team,
	Chad Mynhier, mike.kravetz


Is there any reason to worry about supporting PUD-sized uprobe pages if
CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD is defined? I would prefer
not to bake in the assumption that "huge" means PMD-sized and more than
it already is.

For example, if CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD is configured,
mm_address_trans_huge() should either make the call to pud_trans_huge()
if appropriate, or a VM_BUG_ON_PAGE should be added in case the routine
is ever called with one.

Otherwise it looks pretty reasonable to me.

    -- Bill


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH uprobe, thp 1/4] mm, thp: allow preallocate pgtable for split_huge_pmd_address()
  2019-05-29 21:20 ` [PATCH uprobe, thp 1/4] mm, thp: allow preallocate pgtable for split_huge_pmd_address() Song Liu
@ 2019-05-30 11:10   ` Kirill A. Shutemov
  2019-05-30 11:14     ` Kirill A. Shutemov
  0 siblings, 1 reply; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-05-30 11:10 UTC (permalink / raw)
  To: Song Liu
  Cc: linux-kernel, linux-mm, namit, peterz, oleg, rostedt, mhiramat,
	matthew.wilcox, kirill.shutemov, kernel-team, william.kucharski,
	chad.mynhier, mike.kravetz

On Wed, May 29, 2019 at 02:20:46PM -0700, Song Liu wrote:
> @@ -2133,10 +2133,15 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>  	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
>  	VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd)
>  				&& !pmd_devmap(*pmd));
> +	/* only file backed vma need preallocate pgtable*/
> +	VM_BUG_ON(vma_is_anonymous(vma) && prealloc_pgtable);
>  
>  	count_vm_event(THP_SPLIT_PMD);
>  
> -	if (!vma_is_anonymous(vma)) {
> +	if (prealloc_pgtable) {
> +		pgtable_trans_huge_deposit(mm, pmd, prealloc_pgtable);
> +		mm_inc_nr_pmds(mm);
> +	} else if (!vma_is_anonymous(vma)) {
>  		_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
>  		/*
>  		 * We are going to unmap this huge page. So

Nope. This going to leak a page table for architectures where
arch_needs_pgtable_deposit() is true.

-- 
 Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH uprobe, thp 1/4] mm, thp: allow preallocate pgtable for split_huge_pmd_address()
  2019-05-30 11:10   ` Kirill A. Shutemov
@ 2019-05-30 11:14     ` Kirill A. Shutemov
  2019-05-30 17:23       ` Song Liu
  0 siblings, 1 reply; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-05-30 11:14 UTC (permalink / raw)
  To: Song Liu
  Cc: linux-kernel, linux-mm, namit, peterz, oleg, rostedt, mhiramat,
	matthew.wilcox, kirill.shutemov, kernel-team, william.kucharski,
	chad.mynhier, mike.kravetz

On Thu, May 30, 2019 at 02:10:15PM +0300, Kirill A. Shutemov wrote:
> On Wed, May 29, 2019 at 02:20:46PM -0700, Song Liu wrote:
> > @@ -2133,10 +2133,15 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
> >  	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
> >  	VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd)
> >  				&& !pmd_devmap(*pmd));
> > +	/* only file backed vma need preallocate pgtable*/
> > +	VM_BUG_ON(vma_is_anonymous(vma) && prealloc_pgtable);
> >  
> >  	count_vm_event(THP_SPLIT_PMD);
> >  
> > -	if (!vma_is_anonymous(vma)) {
> > +	if (prealloc_pgtable) {
> > +		pgtable_trans_huge_deposit(mm, pmd, prealloc_pgtable);
> > +		mm_inc_nr_pmds(mm);
> > +	} else if (!vma_is_anonymous(vma)) {
> >  		_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
> >  		/*
> >  		 * We are going to unmap this huge page. So
> 
> Nope. This going to leak a page table for architectures where
> arch_needs_pgtable_deposit() is true.

And I don't there's correct handling of dirty bit.

And what about DAX? Will it blow up? I think so.

-- 
 Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH uprobe, thp 2/4] uprobe: use original page when all uprobes are removed
  2019-05-29 21:20 ` [PATCH uprobe, thp 2/4] uprobe: use original page when all uprobes are removed Song Liu
@ 2019-05-30 11:17   ` Kirill A. Shutemov
  2019-05-30 17:18     ` Song Liu
  0 siblings, 1 reply; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-05-30 11:17 UTC (permalink / raw)
  To: Song Liu
  Cc: linux-kernel, linux-mm, namit, peterz, oleg, rostedt, mhiramat,
	matthew.wilcox, kirill.shutemov, kernel-team, william.kucharski,
	chad.mynhier, mike.kravetz

On Wed, May 29, 2019 at 02:20:47PM -0700, Song Liu wrote:
> @@ -501,6 +512,20 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
>  	copy_highpage(new_page, old_page);
>  	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
>  
> +	index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
> +	orig_page = find_get_page(vma->vm_file->f_inode->i_mapping, index);
> +	if (orig_page) {
> +		if (memcmp(page_address(orig_page),
> +			   page_address(new_page), PAGE_SIZE) == 0) {

Does it work for highmem?


-- 
 Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd
  2019-05-29 21:20 ` [PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd Song Liu
  2019-05-30 11:08   ` William Kucharski
@ 2019-05-30 12:14   ` Kirill A. Shutemov
  2019-05-30 17:37     ` Song Liu
  1 sibling, 1 reply; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-05-30 12:14 UTC (permalink / raw)
  To: Song Liu
  Cc: linux-kernel, linux-mm, namit, peterz, oleg, rostedt, mhiramat,
	matthew.wilcox, kirill.shutemov, kernel-team, william.kucharski,
	chad.mynhier, mike.kravetz

On Wed, May 29, 2019 at 02:20:48PM -0700, Song Liu wrote:
> Instead of splitting the compound page with FOLL_SPLIT, this patch allows
> uprobe to only split pmd for huge pages.
> 
> A helper function mm_address_trans_huge(mm, address) was introduced to
> test whether the address in mm is pointing to THP.

Maybe it would be cleaner to have FOLL_SPLIT_PMD which would strip
trans_huge PMD if any and then set pte using get_locked_pte()?

This way you'll not need any changes in split_huge_pmd() path. Clearing
PMD will be fine.

-- 
 Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH uprobe, thp 4/4] uprobe: collapse THP pmd after removing all uprobes
  2019-05-29 21:20 ` [PATCH uprobe, thp 4/4] uprobe: collapse THP pmd after removing all uprobes Song Liu
@ 2019-05-30 12:20   ` Kirill A. Shutemov
  2019-05-30 17:26     ` Song Liu
  0 siblings, 1 reply; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-05-30 12:20 UTC (permalink / raw)
  To: Song Liu
  Cc: linux-kernel, linux-mm, namit, peterz, oleg, rostedt, mhiramat,
	matthew.wilcox, kirill.shutemov, kernel-team, william.kucharski,
	chad.mynhier, mike.kravetz

On Wed, May 29, 2019 at 02:20:49PM -0700, Song Liu wrote:
> After all uprobes are removed from the huge page (with PTE pgtable), it
> is possible to collapse the pmd and benefit from THP again. This patch
> does the collapse.

I don't think it's right way to go. We should deferred it to khugepaged.
We need to teach khugepaged to deal with PTE-mapped compound page.
And uprobe should only kick khugepaged for a VMA. Maybe synchronously.

-- 
 Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH uprobe, thp 2/4] uprobe: use original page when all uprobes are removed
  2019-05-30 11:17   ` Kirill A. Shutemov
@ 2019-05-30 17:18     ` Song Liu
  0 siblings, 0 replies; 17+ messages in thread
From: Song Liu @ 2019-05-30 17:18 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-kernel, linux-mm, namit, peterz, oleg, rostedt, mhiramat,
	matthew.wilcox, kirill.shutemov, Kernel Team, william.kucharski,
	chad.mynhier, mike.kravetz



> On May 30, 2019, at 4:17 AM, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> 
> On Wed, May 29, 2019 at 02:20:47PM -0700, Song Liu wrote:
>> @@ -501,6 +512,20 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
>> 	copy_highpage(new_page, old_page);
>> 	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
>> 
>> +	index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
>> +	orig_page = find_get_page(vma->vm_file->f_inode->i_mapping, index);
>> +	if (orig_page) {
>> +		if (memcmp(page_address(orig_page),
>> +			   page_address(new_page), PAGE_SIZE) == 0) {
> 
> Does it work for highmem?

Good catch! I will fix it in v2. 

Thanks!
Song

> 
> 
> -- 
> Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH uprobe, thp 1/4] mm, thp: allow preallocate pgtable for split_huge_pmd_address()
  2019-05-30 11:14     ` Kirill A. Shutemov
@ 2019-05-30 17:23       ` Song Liu
  0 siblings, 0 replies; 17+ messages in thread
From: Song Liu @ 2019-05-30 17:23 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: linux-kernel, linux-mm, namit, peterz, oleg, rostedt, mhiramat,
	matthew.wilcox, kirill.shutemov, Kernel Team, william.kucharski,
	chad.mynhier, mike.kravetz



> On May 30, 2019, at 4:14 AM, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> 
> On Thu, May 30, 2019 at 02:10:15PM +0300, Kirill A. Shutemov wrote:
>> On Wed, May 29, 2019 at 02:20:46PM -0700, Song Liu wrote:
>>> @@ -2133,10 +2133,15 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>>> 	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
>>> 	VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd)
>>> 				&& !pmd_devmap(*pmd));
>>> +	/* only file backed vma need preallocate pgtable*/
>>> +	VM_BUG_ON(vma_is_anonymous(vma) && prealloc_pgtable);
>>> 
>>> 	count_vm_event(THP_SPLIT_PMD);
>>> 
>>> -	if (!vma_is_anonymous(vma)) {
>>> +	if (prealloc_pgtable) {
>>> +		pgtable_trans_huge_deposit(mm, pmd, prealloc_pgtable);
>>> +		mm_inc_nr_pmds(mm);
>>> +	} else if (!vma_is_anonymous(vma)) {
>>> 		_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
>>> 		/*
>>> 		 * We are going to unmap this huge page. So
>> 
>> Nope. This going to leak a page table for architectures where
>> arch_needs_pgtable_deposit() is true.
> 
> And I don't there's correct handling of dirty bit.
> 
> And what about DAX? Will it blow up? I think so.
> 

Let me look into these cases. Thanks for the feedback!

Song

> -- 
> Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd
  2019-05-30 11:08   ` William Kucharski
@ 2019-05-30 17:24     ` Song Liu
  0 siblings, 0 replies; 17+ messages in thread
From: Song Liu @ 2019-05-30 17:24 UTC (permalink / raw)
  To: William Kucharski
  Cc: LKML, Linux-MM, namit, Peter Zijlstra, oleg, Steven Rostedt,
	mhiramat, Matthew Wilcox, kirill.shutemov, Kernel Team,
	Chad Mynhier, mike.kravetz



> On May 30, 2019, at 4:08 AM, William Kucharski <william.kucharski@oracle.com> wrote:
> 
> 
> Is there any reason to worry about supporting PUD-sized uprobe pages if
> CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD is defined? I would prefer
> not to bake in the assumption that "huge" means PMD-sized and more than
> it already is.
> 
> For example, if CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD is configured,
> mm_address_trans_huge() should either make the call to pud_trans_huge()
> if appropriate, or a VM_BUG_ON_PAGE should be added in case the routine
> is ever called with one.
> 
> Otherwise it looks pretty reasonable to me.
> 
>    -- Bill
> 

I will try that in v2. 

Thanks,
Song

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH uprobe, thp 4/4] uprobe: collapse THP pmd after removing all uprobes
  2019-05-30 12:20   ` Kirill A. Shutemov
@ 2019-05-30 17:26     ` Song Liu
  2019-05-31  7:00       ` Kirill A. Shutemov
  0 siblings, 1 reply; 17+ messages in thread
From: Song Liu @ 2019-05-30 17:26 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: LKML, Linux-MM, namit, peterz, oleg, rostedt, mhiramat,
	matthew.wilcox, kirill.shutemov, Kernel Team, william.kucharski,
	chad.mynhier, mike.kravetz



> On May 30, 2019, at 5:20 AM, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> 
> On Wed, May 29, 2019 at 02:20:49PM -0700, Song Liu wrote:
>> After all uprobes are removed from the huge page (with PTE pgtable), it
>> is possible to collapse the pmd and benefit from THP again. This patch
>> does the collapse.
> 
> I don't think it's right way to go. We should deferred it to khugepaged.
> We need to teach khugepaged to deal with PTE-mapped compound page.
> And uprobe should only kick khugepaged for a VMA. Maybe synchronously.
> 

I guess that would be the same logic, but run in khugepaged? It doesn't
have to be done synchronously. 

Let me try that

Thanks,
Song


> -- 
> Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd
  2019-05-30 12:14   ` Kirill A. Shutemov
@ 2019-05-30 17:37     ` Song Liu
  0 siblings, 0 replies; 17+ messages in thread
From: Song Liu @ 2019-05-30 17:37 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: LKML, Linux-MM, namit, peterz, oleg, rostedt, mhiramat,
	matthew.wilcox, kirill.shutemov, Kernel Team, william.kucharski,
	chad.mynhier, mike.kravetz



> On May 30, 2019, at 5:14 AM, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> 
> On Wed, May 29, 2019 at 02:20:48PM -0700, Song Liu wrote:
>> Instead of splitting the compound page with FOLL_SPLIT, this patch allows
>> uprobe to only split pmd for huge pages.
>> 
>> A helper function mm_address_trans_huge(mm, address) was introduced to
>> test whether the address in mm is pointing to THP.
> 
> Maybe it would be cleaner to have FOLL_SPLIT_PMD which would strip
> trans_huge PMD if any and then set pte using get_locked_pte()?

FOLL_SPLIT_PMD sounds like a great idea! Let me try it. 

Thanks,
Song

> 
> This way you'll not need any changes in split_huge_pmd() path. Clearing
> PMD will be fine.
> 
> -- 
> Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH uprobe, thp 4/4] uprobe: collapse THP pmd after removing all uprobes
  2019-05-30 17:26     ` Song Liu
@ 2019-05-31  7:00       ` Kirill A. Shutemov
  0 siblings, 0 replies; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-05-31  7:00 UTC (permalink / raw)
  To: Song Liu
  Cc: LKML, Linux-MM, namit, peterz, oleg, rostedt, mhiramat,
	matthew.wilcox, kirill.shutemov, Kernel Team, william.kucharski,
	chad.mynhier, mike.kravetz

On Thu, May 30, 2019 at 05:26:38PM +0000, Song Liu wrote:
> 
> 
> > On May 30, 2019, at 5:20 AM, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> > 
> > On Wed, May 29, 2019 at 02:20:49PM -0700, Song Liu wrote:
> >> After all uprobes are removed from the huge page (with PTE pgtable), it
> >> is possible to collapse the pmd and benefit from THP again. This patch
> >> does the collapse.
> > 
> > I don't think it's right way to go. We should deferred it to khugepaged.
> > We need to teach khugepaged to deal with PTE-mapped compound page.
> > And uprobe should only kick khugepaged for a VMA. Maybe synchronously.
> > 
> 
> I guess that would be the same logic, but run in khugepaged? It doesn't
> have to be done synchronously. 

My idea was that since we have all required locking in place we can call
into khugepaged code that does the collapse, without waithing for it to
get to the VMA.

-- 
 Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2019-05-31  7:00 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-29 21:20 [PATCH uprobe, thp 0/4] THP aware uprobe Song Liu
2019-05-29 21:20 ` [PATCH uprobe, thp 1/4] mm, thp: allow preallocate pgtable for split_huge_pmd_address() Song Liu
2019-05-30 11:10   ` Kirill A. Shutemov
2019-05-30 11:14     ` Kirill A. Shutemov
2019-05-30 17:23       ` Song Liu
2019-05-29 21:20 ` [PATCH uprobe, thp 2/4] uprobe: use original page when all uprobes are removed Song Liu
2019-05-30 11:17   ` Kirill A. Shutemov
2019-05-30 17:18     ` Song Liu
2019-05-29 21:20 ` [PATCH uprobe, thp 3/4] uprobe: support huge page by only splitting the pmd Song Liu
2019-05-30 11:08   ` William Kucharski
2019-05-30 17:24     ` Song Liu
2019-05-30 12:14   ` Kirill A. Shutemov
2019-05-30 17:37     ` Song Liu
2019-05-29 21:20 ` [PATCH uprobe, thp 4/4] uprobe: collapse THP pmd after removing all uprobes Song Liu
2019-05-30 12:20   ` Kirill A. Shutemov
2019-05-30 17:26     ` Song Liu
2019-05-31  7:00       ` Kirill A. Shutemov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).