linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/9] More THP fixes
@ 2020-10-26 18:31 Matthew Wilcox (Oracle)
  2020-10-26 18:31 ` [PATCH 1/9] mm: Support THPs in zero_user_segments Matthew Wilcox (Oracle)
                   ` (9 more replies)
  0 siblings, 10 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-26 18:31 UTC (permalink / raw)
  To: linux-mm; +Cc: Matthew Wilcox (Oracle), linux-fsdevel

I'm not sure there's a common thread to this set of THP patches other
than I think they're pretty uncontroversial.  Maybe I'm wrong.

Matthew Wilcox (Oracle) (8):
  mm: Support THPs in zero_user_segments
  mm/page-flags: Allow accessing PageError on tail pages
  mm: Return head pages from grab_cache_page_write_begin
  mm: Replace prep_transhuge_page with thp_prep
  mm/truncate: Make invalidate_inode_pages2_range work with THPs
  mm/truncate: Fix invalidate_complete_page2 for THPs
  mm/vmscan: Free non-shmem THPs without splitting them
  mm: Fix READ_ONLY_THP warning

Zi Yan (1):
  mm: Fix THP size assumption in mem_cgroup_split_huge_fixup

 include/linux/highmem.h    | 19 +++++++++---
 include/linux/huge_mm.h    |  7 +++--
 include/linux/page-flags.h |  3 +-
 include/linux/pagemap.h    |  4 +--
 mm/filemap.c               | 15 ++++++---
 mm/highmem.c               | 62 ++++++++++++++++++++++++++++++++++++--
 mm/huge_memory.c           | 12 +++++---
 mm/internal.h              |  1 +
 mm/khugepaged.c            | 12 ++------
 mm/memcontrol.c            |  2 +-
 mm/mempolicy.c             | 15 +++------
 mm/migrate.c               | 15 +++------
 mm/page-writeback.c        |  2 +-
 mm/shmem.c                 |  9 +++---
 mm/truncate.c              | 25 ++++++---------
 mm/vmscan.c                |  4 +--
 16 files changed, 132 insertions(+), 75 deletions(-)

-- 
2.28.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/9] mm: Support THPs in zero_user_segments
  2020-10-26 18:31 [PATCH 0/9] More THP fixes Matthew Wilcox (Oracle)
@ 2020-10-26 18:31 ` Matthew Wilcox (Oracle)
  2020-10-26 18:31 ` [PATCH 2/9] mm/page-flags: Allow accessing PageError on tail pages Matthew Wilcox (Oracle)
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-26 18:31 UTC (permalink / raw)
  To: linux-mm; +Cc: Matthew Wilcox (Oracle), linux-fsdevel

We can only kmap() one subpage of a THP at a time, so loop over all
relevant subpages, skipping ones which don't need to be zeroed.  This is
too large to inline when THPs are enabled and we actually need highmem,
so put it in highmem.c.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/highmem.h | 19 ++++++++++---
 mm/highmem.c            | 62 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 75 insertions(+), 6 deletions(-)

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 14e6202ce47f..8e21fe82b3a3 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -284,13 +284,22 @@ static inline void clear_highpage(struct page *page)
 	kunmap_atomic(kaddr);
 }
 
+/*
+ * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
+ * If we pass in a head page, we can zero up to the size of the compound page.
+ */
+#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
+		unsigned start2, unsigned end2);
+#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
 static inline void zero_user_segments(struct page *page,
-	unsigned start1, unsigned end1,
-	unsigned start2, unsigned end2)
+		unsigned start1, unsigned end1,
+		unsigned start2, unsigned end2)
 {
 	void *kaddr = kmap_atomic(page);
+	unsigned int i;
 
-	BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE);
+	BUG_ON(end1 > page_size(page) || end2 > page_size(page));
 
 	if (end1 > start1)
 		memset(kaddr + start1, 0, end1 - start1);
@@ -299,8 +308,10 @@ static inline void zero_user_segments(struct page *page,
 		memset(kaddr + start2, 0, end2 - start2);
 
 	kunmap_atomic(kaddr);
-	flush_dcache_page(page);
+	for (i = 0; i < compound_nr(page); i++)
+		flush_dcache_page(page + i);
 }
+#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
 
 static inline void zero_user_segment(struct page *page,
 	unsigned start, unsigned end)
diff --git a/mm/highmem.c b/mm/highmem.c
index 1352a27951e3..9901a806617a 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -367,9 +367,67 @@ void kunmap_high(struct page *page)
 	if (need_wakeup)
 		wake_up(pkmap_map_wait);
 }
-
 EXPORT_SYMBOL(kunmap_high);
-#endif	/* CONFIG_HIGHMEM */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
+		unsigned start2, unsigned end2)
+{
+	unsigned int i;
+
+	BUG_ON(end1 > page_size(page) || end2 > page_size(page));
+
+	for (i = 0; i < compound_nr(page); i++) {
+		void *kaddr;
+		unsigned this_end;
+
+		if (end1 == 0 && start2 >= PAGE_SIZE) {
+			start2 -= PAGE_SIZE;
+			end2 -= PAGE_SIZE;
+			continue;
+		}
+
+		if (start1 >= PAGE_SIZE) {
+			start1 -= PAGE_SIZE;
+			end1 -= PAGE_SIZE;
+			if (start2) {
+				start2 -= PAGE_SIZE;
+				end2 -= PAGE_SIZE;
+			}
+			continue;
+		}
+
+		kaddr = kmap_atomic(page + i);
+
+		this_end = min_t(unsigned, end1, PAGE_SIZE);
+		if (end1 > start1)
+			memset(kaddr + start1, 0, this_end - start1);
+		end1 -= this_end;
+		start1 = 0;
+
+		if (start2 >= PAGE_SIZE) {
+			start2 -= PAGE_SIZE;
+			end2 -= PAGE_SIZE;
+		} else {
+			this_end = min_t(unsigned, end2, PAGE_SIZE);
+			if (end2 > start2)
+				memset(kaddr + start2, 0, this_end - start2);
+			end2 -= this_end;
+			start2 = 0;
+		}
+
+		kunmap_atomic(kaddr);
+		flush_dcache_page(page + i);
+
+		if (!end1 && !end2)
+			break;
+	}
+
+	BUG_ON((start1 | start2 | end1 | end2) != 0);
+}
+EXPORT_SYMBOL(zero_user_segments);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif /* CONFIG_HIGHMEM */
 
 #if defined(HASHED_PAGE_VIRTUAL)
 
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/9] mm/page-flags: Allow accessing PageError on tail pages
  2020-10-26 18:31 [PATCH 0/9] More THP fixes Matthew Wilcox (Oracle)
  2020-10-26 18:31 ` [PATCH 1/9] mm: Support THPs in zero_user_segments Matthew Wilcox (Oracle)
@ 2020-10-26 18:31 ` Matthew Wilcox (Oracle)
  2020-10-26 18:31 ` [PATCH 3/9] mm: Return head pages from grab_cache_page_write_begin Matthew Wilcox (Oracle)
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-26 18:31 UTC (permalink / raw)
  To: linux-mm; +Cc: Matthew Wilcox (Oracle), linux-fsdevel

We track whether a page has had a read error for the entire THP, just like
we track Uptodate and Dirty.  This lets, eg, generic_file_buffered_read()
continue to work on the appropriate subpage of the THP without
modification.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/page-flags.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4f6ba9379112..eb3a9796de8e 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -328,7 +328,8 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
 
 __PAGEFLAG(Locked, locked, PF_NO_TAIL)
 PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
-PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
+PAGEFLAG(Error, error, PF_HEAD)
+	TESTCLEARFLAG(Error, error, PF_HEAD)
 PAGEFLAG(Referenced, referenced, PF_HEAD)
 	TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
 	__SETPAGEFLAG(Referenced, referenced, PF_HEAD)
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/9] mm: Return head pages from grab_cache_page_write_begin
  2020-10-26 18:31 [PATCH 0/9] More THP fixes Matthew Wilcox (Oracle)
  2020-10-26 18:31 ` [PATCH 1/9] mm: Support THPs in zero_user_segments Matthew Wilcox (Oracle)
  2020-10-26 18:31 ` [PATCH 2/9] mm/page-flags: Allow accessing PageError on tail pages Matthew Wilcox (Oracle)
@ 2020-10-26 18:31 ` Matthew Wilcox (Oracle)
  2020-10-26 18:31 ` [PATCH 4/9] mm: Replace prep_transhuge_page with thp_prep Matthew Wilcox (Oracle)
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-26 18:31 UTC (permalink / raw)
  To: linux-mm; +Cc: Matthew Wilcox (Oracle), linux-fsdevel

This function is only called from filesystems on their own mapping,
so no caller will be surprised by getting back a head page when they
were expecting a tail page.  This lets us remove a call to thp_head()
in wait_for_stable_page().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c        | 12 ++++++++++--
 mm/page-writeback.c |  2 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 2214a2c48dd1..62bc6affeb70 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3327,15 +3327,23 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 }
 EXPORT_SYMBOL(generic_file_direct_write);
 
-/*
+/**
+ * grab_cache_page_write_begin - Find or create a page for buffered writes.
+ * @mapping: The address space we're writing to.
+ * @index: The index we're writing to.
+ * @flags: %AOP_FLAG_NOFS to prevent memory reclaim calling the filesystem.
+ *
  * Find or create a page at the given pagecache position. Return the locked
  * page. This function is specifically for buffered writes.
+ *
+ * Return: The head page found in the cache, or NULL if no page could be
+ * created (due to lack of memory).
  */
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
 					pgoff_t index, unsigned flags)
 {
 	struct page *page;
-	int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
+	int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_HEAD;
 
 	if (flags & AOP_FLAG_NOFS)
 		fgp_flags |= FGP_NOFS;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7709f0e223f5..3671568d433f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2849,7 +2849,7 @@ EXPORT_SYMBOL_GPL(wait_on_page_writeback);
  */
 void wait_for_stable_page(struct page *page)
 {
-	page = thp_head(page);
+	VM_BUG_ON_PGFLAGS(PageTail(page), page);
 	if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES)
 		wait_on_page_writeback(page);
 }
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/9] mm: Replace prep_transhuge_page with thp_prep
  2020-10-26 18:31 [PATCH 0/9] More THP fixes Matthew Wilcox (Oracle)
                   ` (2 preceding siblings ...)
  2020-10-26 18:31 ` [PATCH 3/9] mm: Return head pages from grab_cache_page_write_begin Matthew Wilcox (Oracle)
@ 2020-10-26 18:31 ` Matthew Wilcox (Oracle)
  2020-10-26 18:31 ` [PATCH 5/9] mm/truncate: Make invalidate_inode_pages2_range work with THPs Matthew Wilcox (Oracle)
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-26 18:31 UTC (permalink / raw)
  To: linux-mm; +Cc: Matthew Wilcox (Oracle), linux-fsdevel, Kirill A . Shutemov

Since this is a THP function, move it into the thp_* function namespace.

By permitting NULL or order-0 pages as an argument, and returning the
argument, callers can write:

	return thp_prep(alloc_pages(...));

instead of assigning the result to a temporary variable and conditionally
passing that to prep_transhuge_page().  It'd be even nicer to have a
thp_alloc() function, but there are a lot of different ways that THPs
get allocated, and replicating the alloc_pages() family of APIs is a
bit too much verbosity.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 include/linux/huge_mm.h |  7 +++++--
 mm/huge_memory.c        | 12 ++++++++----
 mm/khugepaged.c         | 12 +++---------
 mm/mempolicy.c          | 15 ++++-----------
 mm/migrate.c            | 15 ++++-----------
 mm/shmem.c              |  9 ++++-----
 6 files changed, 28 insertions(+), 42 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 0365aa97f8e7..c2ecb6036ad8 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -183,7 +183,7 @@ extern unsigned long thp_get_unmapped_area(struct file *filp,
 		unsigned long addr, unsigned long len, unsigned long pgoff,
 		unsigned long flags);
 
-extern void prep_transhuge_page(struct page *page);
+extern struct page *thp_prep(struct page *page);
 extern void free_transhuge_page(struct page *page);
 bool is_transparent_hugepage(struct page *page);
 
@@ -375,7 +375,10 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
 	return false;
 }
 
-static inline void prep_transhuge_page(struct page *page) {}
+static inline struct page *thp_prep(struct page *page)
+{
+	return page;
+}
 
 static inline bool is_transparent_hugepage(struct page *page)
 {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9474dbc150ed..4448b9cb4327 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -487,15 +487,20 @@ static inline struct deferred_split *get_deferred_split_queue(struct page *page)
 }
 #endif
 
-void prep_transhuge_page(struct page *page)
+struct page *thp_prep(struct page *page)
 {
+	if (!page || compound_order(page) == 0)
+		return page;
 	/*
-	 * we use page->mapping and page->indexlru in second tail page
+	 * we use page->mapping and page->index in second tail page
 	 * as list_head: assuming THP order >= 2
 	 */
+	BUG_ON(compound_order(page) == 1);
 
 	INIT_LIST_HEAD(page_deferred_list(page));
 	set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
+
+	return page;
 }
 
 bool is_transparent_hugepage(struct page *page)
@@ -745,12 +750,11 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		return ret;
 	}
 	gfp = alloc_hugepage_direct_gfpmask(vma);
-	page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+	page = thp_prep(alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER));
 	if (unlikely(!page)) {
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
 	}
-	prep_transhuge_page(page);
 	return __do_huge_pmd_anonymous_page(vmf, page, gfp);
 }
 
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 4e3dff13eb70..3b09c7e4ae3a 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -867,14 +867,13 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
 {
 	VM_BUG_ON_PAGE(*hpage, *hpage);
 
-	*hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
+	*hpage = thp_prep(__alloc_pages_node(node, gfp, HPAGE_PMD_ORDER));
 	if (unlikely(!*hpage)) {
 		count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 		*hpage = ERR_PTR(-ENOMEM);
 		return NULL;
 	}
 
-	prep_transhuge_page(*hpage);
 	count_vm_event(THP_COLLAPSE_ALLOC);
 	return *hpage;
 }
@@ -886,13 +885,8 @@ static int khugepaged_find_target_node(void)
 
 static inline struct page *alloc_khugepaged_hugepage(void)
 {
-	struct page *page;
-
-	page = alloc_pages(alloc_hugepage_khugepaged_gfpmask(),
-			   HPAGE_PMD_ORDER);
-	if (page)
-		prep_transhuge_page(page);
-	return page;
+	return thp_prep(alloc_pages(alloc_hugepage_khugepaged_gfpmask(),
+			   HPAGE_PMD_ORDER));
 }
 
 static struct page *khugepaged_alloc_hugepage(bool *wait)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 3fde772ef5ef..e97cee53c0b1 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1226,19 +1226,12 @@ static struct page *new_page(struct page *page, unsigned long start)
 		vma = vma->vm_next;
 	}
 
-	if (PageHuge(page)) {
+	if (PageHuge(page))
 		return alloc_huge_page_vma(page_hstate(compound_head(page)),
 				vma, address);
-	} else if (PageTransHuge(page)) {
-		struct page *thp;
-
-		thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
-					 HPAGE_PMD_ORDER);
-		if (!thp)
-			return NULL;
-		prep_transhuge_page(thp);
-		return thp;
-	}
+	if (PageTransHuge(page))
+		return thp_prep(alloc_hugepage_vma(GFP_TRANSHUGE, vma,
+				address, thp_order(page)));
 	/*
 	 * if !vma, alloc_page_vma() will use task or system default policy
 	 */
diff --git a/mm/migrate.c b/mm/migrate.c
index 5ca5842df5db..262c91038c41 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1538,7 +1538,6 @@ struct page *alloc_migration_target(struct page *page, unsigned long private)
 	struct migration_target_control *mtc;
 	gfp_t gfp_mask;
 	unsigned int order = 0;
-	struct page *new_page = NULL;
 	int nid;
 	int zidx;
 
@@ -1568,12 +1567,8 @@ struct page *alloc_migration_target(struct page *page, unsigned long private)
 	if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE)
 		gfp_mask |= __GFP_HIGHMEM;
 
-	new_page = __alloc_pages_nodemask(gfp_mask, order, nid, mtc->nmask);
-
-	if (new_page && PageTransHuge(new_page))
-		prep_transhuge_page(new_page);
-
-	return new_page;
+	return thp_prep(__alloc_pages_nodemask(gfp_mask, order, nid,
+			mtc->nmask));
 }
 
 #ifdef CONFIG_NUMA
@@ -2134,12 +2129,10 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	int page_lru = page_is_file_lru(page);
 	unsigned long start = address & HPAGE_PMD_MASK;
 
-	new_page = alloc_pages_node(node,
-		(GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
-		HPAGE_PMD_ORDER);
+	new_page = thp_prep(alloc_pages_node(node,
+			GFP_TRANSHUGE_LIGHT | __GFP_THISNODE, HPAGE_PMD_ORDER));
 	if (!new_page)
 		goto out_fail;
-	prep_transhuge_page(new_page);
 
 	isolated = numamigrate_isolate_page(pgdat, page);
 	if (!isolated) {
diff --git a/mm/shmem.c b/mm/shmem.c
index 0cce132457f1..c10f8ecf85ce 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1486,12 +1486,11 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 		return NULL;
 
 	shmem_pseudo_vma_init(&pvma, info, hindex);
-	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+	gfp |= __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN;
+	page = thp_prep(alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0,
+			numa_node_id(), true));
 	shmem_pseudo_vma_destroy(&pvma);
-	if (page)
-		prep_transhuge_page(page);
-	else
+	if (!page)
 		count_vm_event(THP_FILE_FALLBACK);
 	return page;
 }
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/9] mm/truncate: Make invalidate_inode_pages2_range work with THPs
  2020-10-26 18:31 [PATCH 0/9] More THP fixes Matthew Wilcox (Oracle)
                   ` (3 preceding siblings ...)
  2020-10-26 18:31 ` [PATCH 4/9] mm: Replace prep_transhuge_page with thp_prep Matthew Wilcox (Oracle)
@ 2020-10-26 18:31 ` Matthew Wilcox (Oracle)
  2020-10-26 18:31 ` [PATCH 6/9] mm/truncate: Fix invalidate_complete_page2 for THPs Matthew Wilcox (Oracle)
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-26 18:31 UTC (permalink / raw)
  To: linux-mm; +Cc: Matthew Wilcox (Oracle), linux-fsdevel

If we're going to unmap a THP, we have to be sure to unmap the entire
page, not just the part of it which lies after the search index.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/truncate.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/mm/truncate.c b/mm/truncate.c
index 27cf411ae51f..30653b2717d3 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -652,6 +652,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 	while (find_get_entries(mapping, index, end, &pvec, indices)) {
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
+			pgoff_t count;
 
 			/* We rely upon deletion not changing page->index */
 			index = indices[i];
@@ -664,27 +665,22 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 			}
 
 			lock_page(page);
-			WARN_ON(page_to_index(page) != index);
 			if (page->mapping != mapping) {
 				unlock_page(page);
 				continue;
 			}
 			wait_on_page_writeback(page);
+			count = thp_nr_pages(page);
+			index = page->index + count - 1;
 			if (page_mapped(page)) {
 				if (!did_range_unmap) {
-					/*
-					 * Zap the rest of the file in one hit.
-					 */
-					unmap_mapping_pages(mapping, index,
-						(1 + end - index), false);
+					/* Zap the rest of the file */
+					count = max(count,
+							end - page->index + 1);
 					did_range_unmap = 1;
-				} else {
-					/*
-					 * Just zap this page
-					 */
-					unmap_mapping_pages(mapping, index,
-								1, false);
 				}
+				unmap_mapping_pages(mapping, page->index,
+						count, false);
 			}
 			BUG_ON(page_mapped(page));
 			ret2 = do_launder_page(mapping, page);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 6/9] mm/truncate: Fix invalidate_complete_page2 for THPs
  2020-10-26 18:31 [PATCH 0/9] More THP fixes Matthew Wilcox (Oracle)
                   ` (4 preceding siblings ...)
  2020-10-26 18:31 ` [PATCH 5/9] mm/truncate: Make invalidate_inode_pages2_range work with THPs Matthew Wilcox (Oracle)
@ 2020-10-26 18:31 ` Matthew Wilcox (Oracle)
  2020-10-26 18:31 ` [PATCH 7/9] mm/vmscan: Free non-shmem THPs without splitting them Matthew Wilcox (Oracle)
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-26 18:31 UTC (permalink / raw)
  To: linux-mm; +Cc: Matthew Wilcox (Oracle), linux-fsdevel

invalidate_complete_page2() currently open-codes page_cache_free_page(),
except for the part where it handles THP.  Rather than adding that,
call page_cache_free_page() from invalidate_complete_page2().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c  | 3 +--
 mm/internal.h | 1 +
 mm/truncate.c | 5 +----
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 62bc6affeb70..00de12d42bc4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -228,8 +228,7 @@ void __delete_from_page_cache(struct page *page, void *shadow)
 	page_cache_delete(mapping, page, shadow);
 }
 
-static void page_cache_free_page(struct address_space *mapping,
-				struct page *page)
+void page_cache_free_page(struct address_space *mapping, struct page *page)
 {
 	void (*freepage)(struct page *);
 
diff --git a/mm/internal.h b/mm/internal.h
index 5aca7d7bc57c..1391e3239547 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -623,4 +623,5 @@ struct migration_target_control {
 };
 
 bool truncate_inode_partial_page(struct page *page, loff_t start, loff_t end);
+void page_cache_free_page(struct address_space *mapping, struct page *page);
 #endif	/* __MM_INTERNAL_H */
diff --git a/mm/truncate.c b/mm/truncate.c
index 30653b2717d3..bed24857d1d2 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -603,10 +603,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	__delete_from_page_cache(page, NULL);
 	xa_unlock_irqrestore(&mapping->i_pages, flags);
 
-	if (mapping->a_ops->freepage)
-		mapping->a_ops->freepage(page);
-
-	put_page(page);	/* pagecache ref */
+	page_cache_free_page(mapping, page);
 	return 1;
 failed:
 	xa_unlock_irqrestore(&mapping->i_pages, flags);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 7/9] mm/vmscan: Free non-shmem THPs without splitting them
  2020-10-26 18:31 [PATCH 0/9] More THP fixes Matthew Wilcox (Oracle)
                   ` (5 preceding siblings ...)
  2020-10-26 18:31 ` [PATCH 6/9] mm/truncate: Fix invalidate_complete_page2 for THPs Matthew Wilcox (Oracle)
@ 2020-10-26 18:31 ` Matthew Wilcox (Oracle)
  2020-10-26 18:31 ` [PATCH 8/9] mm: Fix THP size assumption in mem_cgroup_split_huge_fixup Matthew Wilcox (Oracle)
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-26 18:31 UTC (permalink / raw)
  To: linux-mm; +Cc: Matthew Wilcox (Oracle), linux-fsdevel

We have to allocate memory in order to split a file-backed page,
so it's not a good idea to split them.  It also doesn't work for XFS
because pages have an extra reference count from page_has_private() and
split_huge_page() expects that reference to have already been removed.
Unfortunately, we still have to split shmem THPs because we can't handle
swapping out an entire THP yet.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/vmscan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1b8f0e059767..8e60ae2fabd1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1274,8 +1274,8 @@ static unsigned int shrink_page_list(struct list_head *page_list,
 				/* Adding to swap updated mapping */
 				mapping = page_mapping(page);
 			}
-		} else if (unlikely(PageTransHuge(page))) {
-			/* Split file THP */
+		} else if (PageSwapBacked(page) && PageTransHuge(page)) {
+			/* Split shmem THP */
 			if (split_huge_page_to_list(page, page_list))
 				goto keep_locked;
 		}
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 8/9] mm: Fix THP size assumption in mem_cgroup_split_huge_fixup
  2020-10-26 18:31 [PATCH 0/9] More THP fixes Matthew Wilcox (Oracle)
                   ` (6 preceding siblings ...)
  2020-10-26 18:31 ` [PATCH 7/9] mm/vmscan: Free non-shmem THPs without splitting them Matthew Wilcox (Oracle)
@ 2020-10-26 18:31 ` Matthew Wilcox (Oracle)
  2020-10-26 18:31 ` [PATCH 9/9] mm: Fix READ_ONLY_THP warning Matthew Wilcox (Oracle)
  2020-10-27 14:36 ` [PATCH 0/9] More THP fixes Zi Yan
  9 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-26 18:31 UTC (permalink / raw)
  To: linux-mm; +Cc: Zi Yan, linux-fsdevel, Matthew Wilcox

From: Zi Yan <ziy@nvidia.com>

Ask the page what size it is instead of assuming PMD size.

Signed-off-by: Zi Yan <ziy@nvidia.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/memcontrol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3a24e3b619f5..e7824c4dab25 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3280,7 +3280,7 @@ void mem_cgroup_split_huge_fixup(struct page *head)
 	if (mem_cgroup_disabled())
 		return;
 
-	for (i = 1; i < HPAGE_PMD_NR; i++) {
+	for (i = 1; i < thp_nr_pages(head); i++) {
 		css_get(&memcg->css);
 		head[i].mem_cgroup = memcg;
 	}
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 9/9] mm: Fix READ_ONLY_THP warning
  2020-10-26 18:31 [PATCH 0/9] More THP fixes Matthew Wilcox (Oracle)
                   ` (7 preceding siblings ...)
  2020-10-26 18:31 ` [PATCH 8/9] mm: Fix THP size assumption in mem_cgroup_split_huge_fixup Matthew Wilcox (Oracle)
@ 2020-10-26 18:31 ` Matthew Wilcox (Oracle)
  2020-10-27 14:36 ` [PATCH 0/9] More THP fixes Zi Yan
  9 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-26 18:31 UTC (permalink / raw)
  To: linux-mm; +Cc: Matthew Wilcox (Oracle), linux-fsdevel

These counters only exist if CONFIG_READ_ONLY_THP_FOR_FS is defined,
but we should not warn if the filesystem natively supports THPs.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/pagemap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 86143d36d028..2c736f8ae324 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -146,7 +146,7 @@ static inline void filemap_nr_thps_inc(struct address_space *mapping)
 	if (!mapping_thp_support(mapping))
 		atomic_inc(&mapping->nr_thps);
 #else
-	WARN_ON_ONCE(1);
+	WARN_ON_ONCE(!mapping_thp_support(mapping));
 #endif
 }
 
@@ -156,7 +156,7 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
 	if (!mapping_thp_support(mapping))
 		atomic_dec(&mapping->nr_thps);
 #else
-	WARN_ON_ONCE(1);
+	WARN_ON_ONCE(!mapping_thp_support(mapping));
 #endif
 }
 
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/9] More THP fixes
  2020-10-26 18:31 [PATCH 0/9] More THP fixes Matthew Wilcox (Oracle)
                   ` (8 preceding siblings ...)
  2020-10-26 18:31 ` [PATCH 9/9] mm: Fix READ_ONLY_THP warning Matthew Wilcox (Oracle)
@ 2020-10-27 14:36 ` Zi Yan
  9 siblings, 0 replies; 11+ messages in thread
From: Zi Yan @ 2020-10-27 14:36 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-mm, linux-fsdevel

[-- Attachment #1: Type: text/plain, Size: 771 bytes --]

On 26 Oct 2020, at 14:31, Matthew Wilcox (Oracle) wrote:

> I'm not sure there's a common thread to this set of THP patches other
> than I think they're pretty uncontroversial.  Maybe I'm wrong.
>
> Matthew Wilcox (Oracle) (8):
>   mm: Support THPs in zero_user_segments
>   mm/page-flags: Allow accessing PageError on tail pages
>   mm: Return head pages from grab_cache_page_write_begin
>   mm: Replace prep_transhuge_page with thp_prep
>   mm/truncate: Make invalidate_inode_pages2_range work with THPs
>   mm/truncate: Fix invalidate_complete_page2 for THPs
>   mm/vmscan: Free non-shmem THPs without splitting them
>   mm: Fix READ_ONLY_THP warning

They look good to me. Thanks.

Reviewed-by: Zi Yan <ziy@nvidia.com>

—
Best Regards,
Yan Zi

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 854 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-10-27 14:36 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-26 18:31 [PATCH 0/9] More THP fixes Matthew Wilcox (Oracle)
2020-10-26 18:31 ` [PATCH 1/9] mm: Support THPs in zero_user_segments Matthew Wilcox (Oracle)
2020-10-26 18:31 ` [PATCH 2/9] mm/page-flags: Allow accessing PageError on tail pages Matthew Wilcox (Oracle)
2020-10-26 18:31 ` [PATCH 3/9] mm: Return head pages from grab_cache_page_write_begin Matthew Wilcox (Oracle)
2020-10-26 18:31 ` [PATCH 4/9] mm: Replace prep_transhuge_page with thp_prep Matthew Wilcox (Oracle)
2020-10-26 18:31 ` [PATCH 5/9] mm/truncate: Make invalidate_inode_pages2_range work with THPs Matthew Wilcox (Oracle)
2020-10-26 18:31 ` [PATCH 6/9] mm/truncate: Fix invalidate_complete_page2 for THPs Matthew Wilcox (Oracle)
2020-10-26 18:31 ` [PATCH 7/9] mm/vmscan: Free non-shmem THPs without splitting them Matthew Wilcox (Oracle)
2020-10-26 18:31 ` [PATCH 8/9] mm: Fix THP size assumption in mem_cgroup_split_huge_fixup Matthew Wilcox (Oracle)
2020-10-26 18:31 ` [PATCH 9/9] mm: Fix READ_ONLY_THP warning Matthew Wilcox (Oracle)
2020-10-27 14:36 ` [PATCH 0/9] More THP fixes Zi Yan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).