All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 00/11] Some cleanups for memory-failure
@ 2024-04-08 19:42 Matthew Wilcox (Oracle)
  2024-04-08 19:42 ` [PATCH v2 01/11] mm/memory-failure: Remove fsdax_pgoff argument from __add_to_kill Matthew Wilcox (Oracle)
                   ` (11 more replies)
  0 siblings, 12 replies; 48+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-04-08 19:42 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox (Oracle), linux-mm

A lot of folio conversions, plus some other simplifications.

Matthew Wilcox (Oracle) (11):
  mm/memory-failure: Remove fsdax_pgoff argument from __add_to_kill
  mm/memory-failure: Pass addr to __add_to_kill()
  mm: Return the address from page_mapped_in_vma()
  mm: Make page_mapped_in_vma conditional on CONFIG_MEMORY_FAILURE
  mm/memory-failure: Convert shake_page() to shake_folio()
  mm: Convert hugetlb_page_mapping_lock_write to folio
  mm/memory-failure: Convert memory_failure() to use a folio
  mm/memory-failure: Convert hwpoison_user_mappings to take a folio
  mm/memory-failure: Add some folio conversions to unpoison_memory
  mm/memory-failure: Use folio functions throughout collect_procs()
  mm/memory-failure: Pass the folio to collect_procs_ksm()

 include/linux/hugetlb.h |   6 +-
 include/linux/ksm.h     |  14 +----
 include/linux/mm.h      |   1 -
 mm/hugetlb.c            |   6 +-
 mm/hwpoison-inject.c    |  11 ++--
 mm/internal.h           |   1 +
 mm/ksm.c                |   5 +-
 mm/memory-failure.c     | 130 ++++++++++++++++++++--------------------
 mm/migrate.c            |   2 +-
 mm/page_vma_mapped.c    |  16 +++--
 10 files changed, 94 insertions(+), 98 deletions(-)

-- 
2.43.0



^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v2 01/11] mm/memory-failure: Remove fsdax_pgoff argument from __add_to_kill
  2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
@ 2024-04-08 19:42 ` Matthew Wilcox (Oracle)
  2024-04-10  9:09   ` Oscar Salvador
  2024-04-08 19:42 ` [PATCH v2 02/11] mm/memory-failure: Pass addr to __add_to_kill() Matthew Wilcox (Oracle)
                   ` (10 subsequent siblings)
  11 siblings, 1 reply; 48+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-04-08 19:42 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox (Oracle), linux-mm, Jane Chu, Dan Williams

Unify the KSM and DAX codepaths by calculating the addr in
add_to_kill_fsdax() instead of telling __add_to_kill() to calculate it.

Acked-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: Jane Chu <jane.chu@oracle.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/memory-failure.c | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index edd6e114462f..34b3c8255c9b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -415,21 +415,13 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
  * not much we can do.	We just print a message and ignore otherwise.
  */
 
-#define FSDAX_INVALID_PGOFF ULONG_MAX
-
 /*
  * Schedule a process for later kill.
  * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
- *
- * Note: @fsdax_pgoff is used only when @p is a fsdax page and a
- * filesystem with a memory failure handler has claimed the
- * memory_failure event. In all other cases, page->index and
- * page->mapping are sufficient for mapping the page back to its
- * corresponding user virtual address.
  */
 static void __add_to_kill(struct task_struct *tsk, struct page *p,
 			  struct vm_area_struct *vma, struct list_head *to_kill,
-			  unsigned long ksm_addr, pgoff_t fsdax_pgoff)
+			  unsigned long addr)
 {
 	struct to_kill *tk;
 
@@ -439,12 +431,10 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
 		return;
 	}
 
-	tk->addr = ksm_addr ? ksm_addr : page_address_in_vma(p, vma);
-	if (is_zone_device_page(p)) {
-		if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
-			tk->addr = vma_address(vma, fsdax_pgoff, 1);
+	tk->addr = addr ? addr : page_address_in_vma(p, vma);
+	if (is_zone_device_page(p))
 		tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
-	} else
+	else
 		tk->size_shift = page_shift(compound_head(p));
 
 	/*
@@ -474,7 +464,7 @@ static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
 				  struct vm_area_struct *vma,
 				  struct list_head *to_kill)
 {
-	__add_to_kill(tsk, p, vma, to_kill, 0, FSDAX_INVALID_PGOFF);
+	__add_to_kill(tsk, p, vma, to_kill, 0);
 }
 
 #ifdef CONFIG_KSM
@@ -492,10 +482,10 @@ static bool task_in_to_kill_list(struct list_head *to_kill,
 }
 void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
 		     struct vm_area_struct *vma, struct list_head *to_kill,
-		     unsigned long ksm_addr)
+		     unsigned long addr)
 {
 	if (!task_in_to_kill_list(to_kill, tsk))
-		__add_to_kill(tsk, p, vma, to_kill, ksm_addr, FSDAX_INVALID_PGOFF);
+		__add_to_kill(tsk, p, vma, to_kill, addr);
 }
 #endif
 /*
@@ -669,7 +659,8 @@ static void add_to_kill_fsdax(struct task_struct *tsk, struct page *p,
 			      struct vm_area_struct *vma,
 			      struct list_head *to_kill, pgoff_t pgoff)
 {
-	__add_to_kill(tsk, p, vma, to_kill, 0, pgoff);
+	unsigned long addr = vma_address(vma, pgoff, 1);
+	__add_to_kill(tsk, p, vma, to_kill, addr);
 }
 
 /*
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v2 02/11] mm/memory-failure: Pass addr to __add_to_kill()
  2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
  2024-04-08 19:42 ` [PATCH v2 01/11] mm/memory-failure: Remove fsdax_pgoff argument from __add_to_kill Matthew Wilcox (Oracle)
@ 2024-04-08 19:42 ` Matthew Wilcox (Oracle)
  2024-04-08 22:32   ` Jane Chu
  2024-04-10  9:11   ` Oscar Salvador
  2024-04-08 19:42 ` [PATCH v2 03/11] mm: Return the address from page_mapped_in_vma() Matthew Wilcox (Oracle)
                   ` (9 subsequent siblings)
  11 siblings, 2 replies; 48+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-04-08 19:42 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox (Oracle), linux-mm, Longlong Xia

Handle anon/file folios the same way as KSM & DAX folios by passing in
the address.

Cc: Longlong Xia <xialonglong1@huawei.com>
Acked-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/memory-failure.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 34b3c8255c9b..f94d85075ec1 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -431,7 +431,7 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
 		return;
 	}
 
-	tk->addr = addr ? addr : page_address_in_vma(p, vma);
+	tk->addr = addr;
 	if (is_zone_device_page(p))
 		tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
 	else
@@ -464,7 +464,8 @@ static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
 				  struct vm_area_struct *vma,
 				  struct list_head *to_kill)
 {
-	__add_to_kill(tsk, p, vma, to_kill, 0);
+	unsigned long addr = page_address_in_vma(p, vma);
+	__add_to_kill(tsk, p, vma, to_kill, addr);
 }
 
 #ifdef CONFIG_KSM
@@ -480,6 +481,7 @@ static bool task_in_to_kill_list(struct list_head *to_kill,
 
 	return false;
 }
+
 void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
 		     struct vm_area_struct *vma, struct list_head *to_kill,
 		     unsigned long addr)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v2 03/11] mm: Return the address from page_mapped_in_vma()
  2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
  2024-04-08 19:42 ` [PATCH v2 01/11] mm/memory-failure: Remove fsdax_pgoff argument from __add_to_kill Matthew Wilcox (Oracle)
  2024-04-08 19:42 ` [PATCH v2 02/11] mm/memory-failure: Pass addr to __add_to_kill() Matthew Wilcox (Oracle)
@ 2024-04-08 19:42 ` Matthew Wilcox (Oracle)
  2024-04-08 22:38   ` Jane Chu
  2024-04-10  9:38   ` Oscar Salvador
  2024-04-08 19:42 ` [PATCH v2 04/11] mm: Make page_mapped_in_vma conditional on CONFIG_MEMORY_FAILURE Matthew Wilcox (Oracle)
                   ` (8 subsequent siblings)
  11 siblings, 2 replies; 48+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-04-08 19:42 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox (Oracle), linux-mm

The only user of this function calls page_address_in_vma() immediately
after page_mapped_in_vma() calculates it and uses it to return true/false.
Return the address instead, allowing memory-failure to skip the call
to page_address_in_vma().

Acked-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/page_vma_mapped.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 53b8868ede61..48bfc17934cd 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -319,9 +319,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
  * @page: the page to test
  * @vma: the VMA to test
  *
- * Returns 1 if the page is mapped into the page tables of the VMA, 0
- * if the page is not mapped into the page tables of this VMA.  Only
- * valid for normal file or anonymous VMAs.
+ * Return: The address the page is mapped at if the page is in the range
+ * covered by the VMA and present in the page table.  If the page is
+ * outside the VMA or not present, returns -EFAULT.
+ * Only valid for normal file or anonymous VMAs.
  */
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 {
@@ -336,9 +337,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 
 	pvmw.address = vma_address(vma, pgoff, 1);
 	if (pvmw.address == -EFAULT)
-		return 0;
+	       goto out;
 	if (!page_vma_mapped_walk(&pvmw))
-		return 0;
+		return -EFAULT;
 	page_vma_mapped_walk_done(&pvmw);
-	return 1;
+out:
+	return pvmw.address;
 }
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v2 04/11] mm: Make page_mapped_in_vma conditional on CONFIG_MEMORY_FAILURE
  2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
                   ` (2 preceding siblings ...)
  2024-04-08 19:42 ` [PATCH v2 03/11] mm: Return the address from page_mapped_in_vma() Matthew Wilcox (Oracle)
@ 2024-04-08 19:42 ` Matthew Wilcox (Oracle)
  2024-04-08 22:45   ` Jane Chu
  2024-04-10  9:39   ` Oscar Salvador
  2024-04-08 19:42 ` [PATCH v2 05/11] mm/memory-failure: Convert shake_page() to shake_folio() Matthew Wilcox (Oracle)
                   ` (7 subsequent siblings)
  11 siblings, 2 replies; 48+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-04-08 19:42 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox (Oracle), linux-mm

This function is only currently used by the memory-failure code, so
we can omit it if we're not compiling in the memory-failure code.

Suggested-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/page_vma_mapped.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 48bfc17934cd..6e06267fc220 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -314,6 +314,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	return false;
 }
 
+#ifdef CONFIG_MEMORY_FAILURE
 /**
  * page_mapped_in_vma - check whether a page is really mapped in a VMA
  * @page: the page to test
@@ -344,3 +345,4 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 out:
 	return pvmw.address;
 }
+#endif
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v2 05/11] mm/memory-failure: Convert shake_page() to shake_folio()
  2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
                   ` (3 preceding siblings ...)
  2024-04-08 19:42 ` [PATCH v2 04/11] mm: Make page_mapped_in_vma conditional on CONFIG_MEMORY_FAILURE Matthew Wilcox (Oracle)
@ 2024-04-08 19:42 ` Matthew Wilcox (Oracle)
  2024-04-08 22:53   ` Jane Chu
  2024-04-08 19:42 ` [PATCH v2 06/11] mm: Convert hugetlb_page_mapping_lock_write to folio Matthew Wilcox (Oracle)
                   ` (6 subsequent siblings)
  11 siblings, 1 reply; 48+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-04-08 19:42 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox (Oracle), linux-mm

Removes two calls to compound_head().  Move the prototype to
internal.h; we definitely don't want code outside mm using it.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/mm.h   |  1 -
 mm/hwpoison-inject.c | 11 ++++++-----
 mm/internal.h        |  1 +
 mm/memory-failure.c  | 15 ++++++++++-----
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b9173e230804..19a67f45907b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4011,7 +4011,6 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
 extern int memory_failure(unsigned long pfn, int flags);
 extern void memory_failure_queue_kick(int cpu);
 extern int unpoison_memory(unsigned long pfn);
-extern void shake_page(struct page *p);
 extern atomic_long_t num_poisoned_pages __read_mostly;
 extern int soft_offline_page(unsigned long pfn, int flags);
 #ifdef CONFIG_MEMORY_FAILURE
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index d0548e382b6b..c9d653f51e45 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -15,7 +15,7 @@ static int hwpoison_inject(void *data, u64 val)
 {
 	unsigned long pfn = val;
 	struct page *p;
-	struct page *hpage;
+	struct folio *folio;
 	int err;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -25,16 +25,17 @@ static int hwpoison_inject(void *data, u64 val)
 		return -ENXIO;
 
 	p = pfn_to_page(pfn);
-	hpage = compound_head(p);
+	folio = page_folio(p);
 
 	if (!hwpoison_filter_enable)
 		goto inject;
 
-	shake_page(hpage);
+	shake_folio(folio);
 	/*
 	 * This implies unable to support non-LRU pages except free page.
 	 */
-	if (!PageLRU(hpage) && !PageHuge(p) && !is_free_buddy_page(p))
+	if (!folio_test_lru(folio) && !folio_test_hugetlb(folio) &&
+	    !is_free_buddy_page(p))
 		return 0;
 
 	/*
@@ -42,7 +43,7 @@ static int hwpoison_inject(void *data, u64 val)
 	 * the targeted owner (or on a free page).
 	 * memory_failure() will redo the check reliably inside page lock.
 	 */
-	err = hwpoison_filter(hpage);
+	err = hwpoison_filter(&folio->page);
 	if (err)
 		return 0;
 
diff --git a/mm/internal.h b/mm/internal.h
index 57c1055d5568..14cdc9ccb582 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1010,6 +1010,7 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
 /*
  * mm/memory-failure.c
  */
+void shake_folio(struct folio *folio);
 extern int hwpoison_filter(struct page *p);
 
 extern u32 hwpoison_filter_dev_major;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index f94d85075ec1..2e64e132bba1 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -357,20 +357,25 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
  * Unknown page type encountered. Try to check whether it can turn PageLRU by
  * lru_add_drain_all.
  */
-void shake_page(struct page *p)
+void shake_folio(struct folio *folio)
 {
-	if (PageHuge(p))
+	if (folio_test_hugetlb(folio))
 		return;
 	/*
 	 * TODO: Could shrink slab caches here if a lightweight range-based
 	 * shrinker will be available.
 	 */
-	if (PageSlab(p))
+	if (folio_test_slab(folio))
 		return;
 
 	lru_add_drain_all();
 }
-EXPORT_SYMBOL_GPL(shake_page);
+EXPORT_SYMBOL_GPL(shake_folio);
+
+static void shake_page(struct page *page)
+{
+	shake_folio(page_folio(page));
+}
 
 static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
 		unsigned long address)
@@ -1623,7 +1628,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	 * shake_page() again to ensure that it's flushed.
 	 */
 	if (mlocked)
-		shake_page(hpage);
+		shake_folio(folio);
 
 	/*
 	 * Now that the dirty bit has been propagated to the
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v2 06/11] mm: Convert hugetlb_page_mapping_lock_write to folio
  2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
                   ` (4 preceding siblings ...)
  2024-04-08 19:42 ` [PATCH v2 05/11] mm/memory-failure: Convert shake_page() to shake_folio() Matthew Wilcox (Oracle)
@ 2024-04-08 19:42 ` Matthew Wilcox (Oracle)
  2024-04-08 23:09   ` Jane Chu
  2024-04-10  9:52   ` Oscar Salvador
  2024-04-08 19:42 ` [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio Matthew Wilcox (Oracle)
                   ` (5 subsequent siblings)
  11 siblings, 2 replies; 48+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-04-08 19:42 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox (Oracle), linux-mm

The page is only used to get the mapping, so the folio will do just
as well.  Both callers already have a folio available, so this saves
a call to compound_head().

Acked-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/hugetlb.h | 6 +++---
 mm/hugetlb.c            | 6 +++---
 mm/memory-failure.c     | 2 +-
 mm/migrate.c            | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3f3e62880279..bebf4c3a53ef 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -178,7 +178,7 @@ bool hugetlbfs_pagecache_present(struct hstate *h,
 				 struct vm_area_struct *vma,
 				 unsigned long address);
 
-struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage);
+struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio);
 
 extern int sysctl_hugetlb_shm_group;
 extern struct list_head huge_boot_pages[MAX_NUMNODES];
@@ -297,8 +297,8 @@ static inline unsigned long hugetlb_total_pages(void)
 	return 0;
 }
 
-static inline struct address_space *hugetlb_page_mapping_lock_write(
-							struct page *hpage)
+static inline struct address_space *hugetlb_folio_mapping_lock_write(
+							struct folio *folio)
 {
 	return NULL;
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 456c81fbf8f5..707c85303e88 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2155,13 +2155,13 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
 /*
  * Find and lock address space (mapping) in write mode.
  *
- * Upon entry, the page is locked which means that page_mapping() is
+ * Upon entry, the folio is locked which means that folio_mapping() is
  * stable.  Due to locking order, we can only trylock_write.  If we can
  * not get the lock, simply return NULL to caller.
  */
-struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage)
+struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio)
 {
-	struct address_space *mapping = page_mapping(hpage);
+	struct address_space *mapping = folio_mapping(folio);
 
 	if (!mapping)
 		return mapping;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2e64e132bba1..0a45fb7fb055 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1608,7 +1608,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 		 * TTU_RMAP_LOCKED to indicate we have taken the lock
 		 * at this higher level.
 		 */
-		mapping = hugetlb_page_mapping_lock_write(hpage);
+		mapping = hugetlb_folio_mapping_lock_write(folio);
 		if (mapping) {
 			try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
 			i_mmap_unlock_write(mapping);
diff --git a/mm/migrate.c b/mm/migrate.c
index 285072bca29c..f8da9b89e043 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1425,7 +1425,7 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio,
 			 * semaphore in write mode here and set TTU_RMAP_LOCKED
 			 * to let lower levels know we have taken the lock.
 			 */
-			mapping = hugetlb_page_mapping_lock_write(&src->page);
+			mapping = hugetlb_folio_mapping_lock_write(src);
 			if (unlikely(!mapping))
 				goto unlock_put_anon;
 
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
                   ` (5 preceding siblings ...)
  2024-04-08 19:42 ` [PATCH v2 06/11] mm: Convert hugetlb_page_mapping_lock_write to folio Matthew Wilcox (Oracle)
@ 2024-04-08 19:42 ` Matthew Wilcox (Oracle)
  2024-04-09  0:34   ` Jane Chu
  2024-04-08 19:42 ` [PATCH v2 08/11] mm/memory-failure: Convert hwpoison_user_mappings to take " Matthew Wilcox (Oracle)
                   ` (4 subsequent siblings)
  11 siblings, 1 reply; 48+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-04-08 19:42 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox (Oracle), linux-mm

Saves dozens of calls to compound_head().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/memory-failure.c | 40 +++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 0a45fb7fb055..1c7c73776604 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2173,7 +2173,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 int memory_failure(unsigned long pfn, int flags)
 {
 	struct page *p;
-	struct page *hpage;
+	struct folio *folio;
 	struct dev_pagemap *pgmap;
 	int res = 0;
 	unsigned long page_flags;
@@ -2261,8 +2261,8 @@ int memory_failure(unsigned long pfn, int flags)
 		}
 	}
 
-	hpage = compound_head(p);
-	if (PageTransHuge(hpage)) {
+	folio = page_folio(p);
+	if (folio_test_large(folio)) {
 		/*
 		 * The flag must be set after the refcount is bumped
 		 * otherwise it may race with THP split.
@@ -2276,12 +2276,13 @@ int memory_failure(unsigned long pfn, int flags)
 		 * or unhandlable page.  The refcount is bumped iff the
 		 * page is a valid handlable page.
 		 */
-		SetPageHasHWPoisoned(hpage);
+		folio_set_has_hwpoisoned(folio);
 		if (try_to_split_thp_page(p) < 0) {
 			res = action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
 			goto unlock_mutex;
 		}
 		VM_BUG_ON_PAGE(!page_count(p), p);
+		folio = page_folio(p);
 	}
 
 	/*
@@ -2292,9 +2293,9 @@ int memory_failure(unsigned long pfn, int flags)
 	 * The check (unnecessarily) ignores LRU pages being isolated and
 	 * walked by the page reclaim code, however that's not a big loss.
 	 */
-	shake_page(p);
+	shake_folio(folio);
 
-	lock_page(p);
+	folio_lock(folio);
 
 	/*
 	 * We're only intended to deal with the non-Compound page here.
@@ -2302,11 +2303,11 @@ int memory_failure(unsigned long pfn, int flags)
 	 * race window. If this happens, we could try again to hopefully
 	 * handle the page next round.
 	 */
-	if (PageCompound(p)) {
+	if (folio_test_large(folio)) {
 		if (retry) {
 			ClearPageHWPoison(p);
-			unlock_page(p);
-			put_page(p);
+			folio_unlock(folio);
+			folio_put(folio);
 			flags &= ~MF_COUNT_INCREASED;
 			retry = false;
 			goto try_again;
@@ -2322,29 +2323,29 @@ int memory_failure(unsigned long pfn, int flags)
 	 * folio_remove_rmap_*() in try_to_unmap_one(). So to determine page
 	 * status correctly, we save a copy of the page flags at this time.
 	 */
-	page_flags = p->flags;
+	page_flags = folio->flags;
 
 	if (hwpoison_filter(p)) {
 		ClearPageHWPoison(p);
-		unlock_page(p);
-		put_page(p);
+		folio_unlock(folio);
+		folio_put(folio);
 		res = -EOPNOTSUPP;
 		goto unlock_mutex;
 	}
 
 	/*
-	 * __munlock_folio() may clear a writeback page's LRU flag without
-	 * page_lock. We need wait writeback completion for this page or it
-	 * may trigger vfs BUG while evict inode.
+	 * __munlock_folio() may clear a writeback folio's LRU flag without
+	 * the folio lock. We need to wait for writeback completion for this
+	 * folio or it may trigger a vfs BUG while evicting inode.
 	 */
-	if (!PageLRU(p) && !PageWriteback(p))
+	if (!folio_test_lru(folio) && !folio_test_writeback(folio))
 		goto identify_page_state;
 
 	/*
 	 * It's very difficult to mess with pages currently under IO
 	 * and in many cases impossible, so we just avoid it here.
 	 */
-	wait_on_page_writeback(p);
+	folio_wait_writeback(folio);
 
 	/*
 	 * Now take care of user space mappings.
@@ -2358,7 +2359,8 @@ int memory_failure(unsigned long pfn, int flags)
 	/*
 	 * Torn down by someone else?
 	 */
-	if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
+	if (folio_test_lru(folio) && !folio_test_swapcache(folio) &&
+	    folio->mapping == NULL) {
 		res = action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
 		goto unlock_page;
 	}
@@ -2368,7 +2370,7 @@ int memory_failure(unsigned long pfn, int flags)
 	mutex_unlock(&mf_mutex);
 	return res;
 unlock_page:
-	unlock_page(p);
+	folio_unlock(folio);
 unlock_mutex:
 	mutex_unlock(&mf_mutex);
 	return res;
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v2 08/11] mm/memory-failure: Convert hwpoison_user_mappings to take a folio
  2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
                   ` (6 preceding siblings ...)
  2024-04-08 19:42 ` [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio Matthew Wilcox (Oracle)
@ 2024-04-08 19:42 ` Matthew Wilcox (Oracle)
  2024-04-09  6:15   ` Jane Chu
  2024-04-08 19:42 ` [PATCH v2 09/11] mm/memory-failure: Add some folio conversions to unpoison_memory Matthew Wilcox (Oracle)
                   ` (3 subsequent siblings)
  11 siblings, 1 reply; 48+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-04-08 19:42 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox (Oracle), linux-mm

Pass the folio from the callers, and use it throughout instead of hpage.
Saves dozens of calls to compound_head().

Acked-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/memory-failure.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 1c7c73776604..fae0b42f0aaf 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1543,24 +1543,24 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
  * Do all that is necessary to remove user space mappings. Unmap
  * the pages and send SIGBUS to the processes if the data was dirty.
  */
-static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
-				  int flags, struct page *hpage)
+static bool hwpoison_user_mappings(struct folio *folio, struct page *p,
+		unsigned long pfn, int flags)
 {
-	struct folio *folio = page_folio(hpage);
 	enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
 	struct address_space *mapping;
 	LIST_HEAD(tokill);
 	bool unmap_success;
 	int forcekill;
-	bool mlocked = PageMlocked(hpage);
+	bool mlocked = folio_test_mlocked(folio);
 
 	/*
 	 * Here we are interested only in user-mapped pages, so skip any
 	 * other types of pages.
 	 */
-	if (PageReserved(p) || PageSlab(p) || PageTable(p) || PageOffline(p))
+	if (folio_test_reserved(folio) || folio_test_slab(folio) ||
+	    folio_test_pgtable(folio) || folio_test_offline(folio))
 		return true;
-	if (!(PageLRU(hpage) || PageHuge(p)))
+	if (!(folio_test_lru(folio) || folio_test_hugetlb(folio)))
 		return true;
 
 	/*
@@ -1570,7 +1570,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	if (!page_mapped(p))
 		return true;
 
-	if (PageSwapCache(p)) {
+	if (folio_test_swapcache(folio)) {
 		pr_err("%#lx: keeping poisoned page in swap cache\n", pfn);
 		ttu &= ~TTU_HWPOISON;
 	}
@@ -1581,11 +1581,11 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	 * XXX: the dirty test could be racy: set_page_dirty() may not always
 	 * be called inside page lock (it's recommended but not enforced).
 	 */
-	mapping = page_mapping(hpage);
-	if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping &&
+	mapping = folio_mapping(folio);
+	if (!(flags & MF_MUST_KILL) && !folio_test_dirty(folio) && mapping &&
 	    mapping_can_writeback(mapping)) {
-		if (page_mkclean(hpage)) {
-			SetPageDirty(hpage);
+		if (folio_mkclean(folio)) {
+			folio_set_dirty(folio);
 		} else {
 			ttu &= ~TTU_HWPOISON;
 			pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
@@ -1600,7 +1600,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	 */
 	collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED);
 
-	if (PageHuge(hpage) && !PageAnon(hpage)) {
+	if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
 		/*
 		 * For hugetlb pages in shared mappings, try_to_unmap
 		 * could potentially call huge_pmd_unshare.  Because of
@@ -1640,7 +1640,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	 * use a more force-full uncatchable kill to prevent
 	 * any accesses to the poisoned memory.
 	 */
-	forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL) ||
+	forcekill = folio_test_dirty(folio) || (flags & MF_MUST_KILL) ||
 		    !unmap_success;
 	kill_procs(&tokill, forcekill, !unmap_success, pfn, flags);
 
@@ -2084,7 +2084,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
 
 	page_flags = folio->flags;
 
-	if (!hwpoison_user_mappings(p, pfn, flags, &folio->page)) {
+	if (!hwpoison_user_mappings(folio, p, pfn, flags)) {
 		folio_unlock(folio);
 		return action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
 	}
@@ -2351,7 +2351,7 @@ int memory_failure(unsigned long pfn, int flags)
 	 * Now take care of user space mappings.
 	 * Abort on fail: __filemap_remove_folio() assumes unmapped page.
 	 */
-	if (!hwpoison_user_mappings(p, pfn, flags, p)) {
+	if (!hwpoison_user_mappings(folio, p, pfn, flags)) {
 		res = action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
 		goto unlock_page;
 	}
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v2 09/11] mm/memory-failure: Add some folio conversions to unpoison_memory
  2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
                   ` (7 preceding siblings ...)
  2024-04-08 19:42 ` [PATCH v2 08/11] mm/memory-failure: Convert hwpoison_user_mappings to take " Matthew Wilcox (Oracle)
@ 2024-04-08 19:42 ` Matthew Wilcox (Oracle)
  2024-04-09  6:17   ` Jane Chu
  2024-04-08 19:42 ` [PATCH v2 10/11] mm/memory-failure: Use folio functions throughout collect_procs() Matthew Wilcox (Oracle)
                   ` (2 subsequent siblings)
  11 siblings, 1 reply; 48+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-04-08 19:42 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox (Oracle), linux-mm

Some of these folio APIs didn't exist when the unpoison_memory()
conversion was done originally.

Acked-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/memory-failure.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index fae0b42f0aaf..7caec2fc3327 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2540,8 +2540,8 @@ int unpoison_memory(unsigned long pfn)
 		goto unlock_mutex;
 	}
 
-	if (folio_test_slab(folio) || PageTable(&folio->page) ||
-	    folio_test_reserved(folio) || PageOffline(&folio->page))
+	if (folio_test_slab(folio) || folio_test_pgtable(folio) ||
+	    folio_test_reserved(folio) || folio_test_offline(folio))
 		goto unlock_mutex;
 
 	/*
@@ -2562,7 +2562,7 @@ int unpoison_memory(unsigned long pfn)
 
 	ghp = get_hwpoison_page(p, MF_UNPOISON);
 	if (!ghp) {
-		if (PageHuge(p)) {
+		if (folio_test_hugetlb(folio)) {
 			huge = true;
 			count = folio_free_raw_hwp(folio, false);
 			if (count == 0)
@@ -2578,7 +2578,7 @@ int unpoison_memory(unsigned long pfn)
 					 pfn, &unpoison_rs);
 		}
 	} else {
-		if (PageHuge(p)) {
+		if (folio_test_hugetlb(folio)) {
 			huge = true;
 			count = folio_free_raw_hwp(folio, false);
 			if (count == 0) {
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v2 10/11] mm/memory-failure: Use folio functions throughout collect_procs()
  2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
                   ` (8 preceding siblings ...)
  2024-04-08 19:42 ` [PATCH v2 09/11] mm/memory-failure: Add some folio conversions to unpoison_memory Matthew Wilcox (Oracle)
@ 2024-04-08 19:42 ` Matthew Wilcox (Oracle)
  2024-04-09  6:19   ` Jane Chu
  2024-04-08 19:42 ` [PATCH v2 11/11] mm/memory-failure: Pass the folio to collect_procs_ksm() Matthew Wilcox (Oracle)
  2024-04-09  6:28 ` [PATCH v2 00/11] Some cleanups for memory-failure Jane Chu
  11 siblings, 1 reply; 48+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-04-08 19:42 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox (Oracle), linux-mm

Saves a couple of calls to compound_head().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/memory-failure.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 7caec2fc3327..1c3cf1fe4964 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -712,9 +712,9 @@ static void collect_procs(struct folio *folio, struct page *page,
 {
 	if (!folio->mapping)
 		return;
-	if (unlikely(PageKsm(page)))
+	if (unlikely(folio_test_ksm(folio)))
 		collect_procs_ksm(page, tokill, force_early);
-	else if (PageAnon(page))
+	else if (folio_test_anon(folio))
 		collect_procs_anon(folio, page, tokill, force_early);
 	else
 		collect_procs_file(folio, page, tokill, force_early);
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v2 11/11] mm/memory-failure: Pass the folio to collect_procs_ksm()
  2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
                   ` (9 preceding siblings ...)
  2024-04-08 19:42 ` [PATCH v2 10/11] mm/memory-failure: Use folio functions throughout collect_procs() Matthew Wilcox (Oracle)
@ 2024-04-08 19:42 ` Matthew Wilcox (Oracle)
  2024-04-09  6:27   ` Jane Chu
  2024-04-09  6:28 ` [PATCH v2 00/11] Some cleanups for memory-failure Jane Chu
  11 siblings, 1 reply; 48+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-04-08 19:42 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox (Oracle), linux-mm

We've already calculated it, so pass it in instead of recalculating it
in collect_procs_ksm().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/ksm.h | 14 +++-----------
 mm/ksm.c            |  5 ++---
 mm/memory-failure.c |  2 +-
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 358803cfd4d5..52c63a9c5a9c 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -81,15 +81,9 @@ struct folio *ksm_might_need_to_copy(struct folio *folio,
 
 void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc);
 void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
-
-#ifdef CONFIG_MEMORY_FAILURE
-void collect_procs_ksm(struct page *page, struct list_head *to_kill,
-		       int force_early);
-#endif
-
-#ifdef CONFIG_PROC_FS
+void collect_procs_ksm(struct folio *folio, struct page *page,
+		struct list_head *to_kill, int force_early);
 long ksm_process_profit(struct mm_struct *);
-#endif /* CONFIG_PROC_FS */
 
 #else  /* !CONFIG_KSM */
 
@@ -120,12 +114,10 @@ static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
 {
 }
 
-#ifdef CONFIG_MEMORY_FAILURE
-static inline void collect_procs_ksm(struct page *page,
+static inline void collect_procs_ksm(struct folio *folio, struct page *page,
 				     struct list_head *to_kill, int force_early)
 {
 }
-#endif
 
 #ifdef CONFIG_MMU
 static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
diff --git a/mm/ksm.c b/mm/ksm.c
index 8c001819cf10..7d032b52002f 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -3172,12 +3172,11 @@ void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc)
 /*
  * Collect processes when the error hit an ksm page.
  */
-void collect_procs_ksm(struct page *page, struct list_head *to_kill,
-		       int force_early)
+void collect_procs_ksm(struct folio *folio, struct page *page,
+		struct list_head *to_kill, int force_early)
 {
 	struct ksm_stable_node *stable_node;
 	struct ksm_rmap_item *rmap_item;
-	struct folio *folio = page_folio(page);
 	struct vm_area_struct *vma;
 	struct task_struct *tsk;
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 1c3cf1fe4964..0630e34b583b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -713,7 +713,7 @@ static void collect_procs(struct folio *folio, struct page *page,
 	if (!folio->mapping)
 		return;
 	if (unlikely(folio_test_ksm(folio)))
-		collect_procs_ksm(page, tokill, force_early);
+		collect_procs_ksm(folio, page, tokill, force_early);
 	else if (folio_test_anon(folio))
 		collect_procs_anon(folio, page, tokill, force_early);
 	else
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 02/11] mm/memory-failure: Pass addr to __add_to_kill()
  2024-04-08 19:42 ` [PATCH v2 02/11] mm/memory-failure: Pass addr to __add_to_kill() Matthew Wilcox (Oracle)
@ 2024-04-08 22:32   ` Jane Chu
  2024-04-10  9:11   ` Oscar Salvador
  1 sibling, 0 replies; 48+ messages in thread
From: Jane Chu @ 2024-04-08 22:32 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Miaohe Lin; +Cc: linux-mm, Longlong Xia

On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:

> Handle anon/file folios the same way as KSM & DAX folios by passing in
> the address.
>
> Cc: Longlong Xia <xialonglong1@huawei.com>
> Acked-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   mm/memory-failure.c | 6 ++++--
>   1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 34b3c8255c9b..f94d85075ec1 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -431,7 +431,7 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
>   		return;
>   	}
>   
> -	tk->addr = addr ? addr : page_address_in_vma(p, vma);
> +	tk->addr = addr;
>   	if (is_zone_device_page(p))
>   		tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
>   	else
> @@ -464,7 +464,8 @@ static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
>   				  struct vm_area_struct *vma,
>   				  struct list_head *to_kill)
>   {
> -	__add_to_kill(tsk, p, vma, to_kill, 0);
> +	unsigned long addr = page_address_in_vma(p, vma);
> +	__add_to_kill(tsk, p, vma, to_kill, addr);
>   }
>   
>   #ifdef CONFIG_KSM
> @@ -480,6 +481,7 @@ static bool task_in_to_kill_list(struct list_head *to_kill,
>   
>   	return false;
>   }
> +
>   void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
>   		     struct vm_area_struct *vma, struct list_head *to_kill,
>   		     unsigned long addr)

Looks good.

Reviewed-by: Jane Chu <jane.chu@oracle.com>

-jane



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 03/11] mm: Return the address from page_mapped_in_vma()
  2024-04-08 19:42 ` [PATCH v2 03/11] mm: Return the address from page_mapped_in_vma() Matthew Wilcox (Oracle)
@ 2024-04-08 22:38   ` Jane Chu
  2024-04-10  9:38   ` Oscar Salvador
  1 sibling, 0 replies; 48+ messages in thread
From: Jane Chu @ 2024-04-08 22:38 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Miaohe Lin; +Cc: linux-mm

On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:

> The only user of this function calls page_address_in_vma() immediately
> after page_mapped_in_vma() calculates it and uses it to return true/false.
> Return the address instead, allowing memory-failure to skip the call
> to page_address_in_vma().
>
> Acked-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   mm/page_vma_mapped.c | 14 ++++++++------
>   1 file changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index 53b8868ede61..48bfc17934cd 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -319,9 +319,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>    * @page: the page to test
>    * @vma: the VMA to test
>    *
> - * Returns 1 if the page is mapped into the page tables of the VMA, 0
> - * if the page is not mapped into the page tables of this VMA.  Only
> - * valid for normal file or anonymous VMAs.
> + * Return: The address the page is mapped at if the page is in the range
> + * covered by the VMA and present in the page table.  If the page is
> + * outside the VMA or not present, returns -EFAULT.
> + * Only valid for normal file or anonymous VMAs.
>    */
>   int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
>   {
> @@ -336,9 +337,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
>   
>   	pvmw.address = vma_address(vma, pgoff, 1);
>   	if (pvmw.address == -EFAULT)
> -		return 0;
> +	       goto out;
>   	if (!page_vma_mapped_walk(&pvmw))
> -		return 0;
> +		return -EFAULT;
>   	page_vma_mapped_walk_done(&pvmw);
> -	return 1;
> +out:
> +	return pvmw.address;
>   }

Looks good.

Reviewed-by: Jane Chu <jane.chu@oracle.com>

-jane



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 04/11] mm: Make page_mapped_in_vma conditional on CONFIG_MEMORY_FAILURE
  2024-04-08 19:42 ` [PATCH v2 04/11] mm: Make page_mapped_in_vma conditional on CONFIG_MEMORY_FAILURE Matthew Wilcox (Oracle)
@ 2024-04-08 22:45   ` Jane Chu
  2024-04-08 22:52     ` Matthew Wilcox
  2024-04-10  9:39   ` Oscar Salvador
  1 sibling, 1 reply; 48+ messages in thread
From: Jane Chu @ 2024-04-08 22:45 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Miaohe Lin; +Cc: linux-mm

On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:

> This function is only currently used by the memory-failure code, so
> we can omit it if we're not compiling in the memory-failure code.
>
> Suggested-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   mm/page_vma_mapped.c | 2 ++
>   1 file changed, 2 insertions(+)
>
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index 48bfc17934cd..6e06267fc220 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -314,6 +314,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>   	return false;
>   }
>   
> +#ifdef CONFIG_MEMORY_FAILURE
>   /**
>    * page_mapped_in_vma - check whether a page is really mapped in a VMA
>    * @page: the page to test
> @@ -344,3 +345,4 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
>   out:
>   	return pvmw.address;
>   }
> +#endif

Should below be put in the CONFIG_MEMORY_FAILURE bracket as well ?

include/linux/rmap.h: int page_mapped_in_vma(struct page *page, struct 
vm_area_struct *vma);

with that,

Reviewed-by: Jane Chu <jane.chu@oracle.com>

thanks,

-jane




^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 04/11] mm: Make page_mapped_in_vma conditional on CONFIG_MEMORY_FAILURE
  2024-04-08 22:45   ` Jane Chu
@ 2024-04-08 22:52     ` Matthew Wilcox
  2024-04-09  6:35       ` Jane Chu
  0 siblings, 1 reply; 48+ messages in thread
From: Matthew Wilcox @ 2024-04-08 22:52 UTC (permalink / raw)
  To: Jane Chu; +Cc: Miaohe Lin, linux-mm

On Mon, Apr 08, 2024 at 03:45:37PM -0700, Jane Chu wrote:
> On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:
> > +#ifdef CONFIG_MEMORY_FAILURE
> >   /**
> >    * page_mapped_in_vma - check whether a page is really mapped in a VMA
> >    * @page: the page to test
> > @@ -344,3 +345,4 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
> >   out:
> >   	return pvmw.address;
> >   }
> > +#endif
> 
> Should below be put in the CONFIG_MEMORY_FAILURE bracket as well ?
> 
> include/linux/rmap.h: int page_mapped_in_vma(struct page *page, struct
> vm_area_struct *vma);

I don't see why we should; I see no advantage to adding that ifdef.
What would motivate you to add it?

> with that,
> 
> Reviewed-by: Jane Chu <jane.chu@oracle.com>

Thanks!


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 05/11] mm/memory-failure: Convert shake_page() to shake_folio()
  2024-04-08 19:42 ` [PATCH v2 05/11] mm/memory-failure: Convert shake_page() to shake_folio() Matthew Wilcox (Oracle)
@ 2024-04-08 22:53   ` Jane Chu
  0 siblings, 0 replies; 48+ messages in thread
From: Jane Chu @ 2024-04-08 22:53 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Miaohe Lin; +Cc: linux-mm

On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:

> Removes two calls to compound_head().  Move the prototype to
> internal.h; we definitely don't want code outside mm using it.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   include/linux/mm.h   |  1 -
>   mm/hwpoison-inject.c | 11 ++++++-----
>   mm/internal.h        |  1 +
>   mm/memory-failure.c  | 15 ++++++++++-----
>   4 files changed, 17 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index b9173e230804..19a67f45907b 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -4011,7 +4011,6 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
>   extern int memory_failure(unsigned long pfn, int flags);
>   extern void memory_failure_queue_kick(int cpu);
>   extern int unpoison_memory(unsigned long pfn);
> -extern void shake_page(struct page *p);
>   extern atomic_long_t num_poisoned_pages __read_mostly;
>   extern int soft_offline_page(unsigned long pfn, int flags);
>   #ifdef CONFIG_MEMORY_FAILURE
> diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
> index d0548e382b6b..c9d653f51e45 100644
> --- a/mm/hwpoison-inject.c
> +++ b/mm/hwpoison-inject.c
> @@ -15,7 +15,7 @@ static int hwpoison_inject(void *data, u64 val)
>   {
>   	unsigned long pfn = val;
>   	struct page *p;
> -	struct page *hpage;
> +	struct folio *folio;
>   	int err;
>   
>   	if (!capable(CAP_SYS_ADMIN))
> @@ -25,16 +25,17 @@ static int hwpoison_inject(void *data, u64 val)
>   		return -ENXIO;
>   
>   	p = pfn_to_page(pfn);
> -	hpage = compound_head(p);
> +	folio = page_folio(p);
>   
>   	if (!hwpoison_filter_enable)
>   		goto inject;
>   
> -	shake_page(hpage);
> +	shake_folio(folio);
>   	/*
>   	 * This implies unable to support non-LRU pages except free page.
>   	 */
> -	if (!PageLRU(hpage) && !PageHuge(p) && !is_free_buddy_page(p))
> +	if (!folio_test_lru(folio) && !folio_test_hugetlb(folio) &&
> +	    !is_free_buddy_page(p))
>   		return 0;
>   
>   	/*
> @@ -42,7 +43,7 @@ static int hwpoison_inject(void *data, u64 val)
>   	 * the targeted owner (or on a free page).
>   	 * memory_failure() will redo the check reliably inside page lock.
>   	 */
> -	err = hwpoison_filter(hpage);
> +	err = hwpoison_filter(&folio->page);
>   	if (err)
>   		return 0;
>   
> diff --git a/mm/internal.h b/mm/internal.h
> index 57c1055d5568..14cdc9ccb582 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1010,6 +1010,7 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
>   /*
>    * mm/memory-failure.c
>    */
> +void shake_folio(struct folio *folio);
>   extern int hwpoison_filter(struct page *p);
>   
>   extern u32 hwpoison_filter_dev_major;
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index f94d85075ec1..2e64e132bba1 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -357,20 +357,25 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
>    * Unknown page type encountered. Try to check whether it can turn PageLRU by
>    * lru_add_drain_all.
>    */
> -void shake_page(struct page *p)
> +void shake_folio(struct folio *folio)
>   {
> -	if (PageHuge(p))
> +	if (folio_test_hugetlb(folio))
>   		return;
>   	/*
>   	 * TODO: Could shrink slab caches here if a lightweight range-based
>   	 * shrinker will be available.
>   	 */
> -	if (PageSlab(p))
> +	if (folio_test_slab(folio))
>   		return;
>   
>   	lru_add_drain_all();
>   }
> -EXPORT_SYMBOL_GPL(shake_page);
> +EXPORT_SYMBOL_GPL(shake_folio);
> +
> +static void shake_page(struct page *page)
> +{
> +	shake_folio(page_folio(page));
> +}
>   
>   static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
>   		unsigned long address)
> @@ -1623,7 +1628,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
>   	 * shake_page() again to ensure that it's flushed.
>   	 */
>   	if (mlocked)
> -		shake_page(hpage);
> +		shake_folio(folio);
>   
>   	/*
>   	 * Now that the dirty bit has been propagated to the

Looks good.  Reviewed-by: Jane Chu <jane.chu@oracle.com>

-jane




^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 06/11] mm: Convert hugetlb_page_mapping_lock_write to folio
  2024-04-08 19:42 ` [PATCH v2 06/11] mm: Convert hugetlb_page_mapping_lock_write to folio Matthew Wilcox (Oracle)
@ 2024-04-08 23:09   ` Jane Chu
  2024-04-10  9:52   ` Oscar Salvador
  1 sibling, 0 replies; 48+ messages in thread
From: Jane Chu @ 2024-04-08 23:09 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Miaohe Lin; +Cc: linux-mm

On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:

> The page is only used to get the mapping, so the folio will do just
> as well.  Both callers already have a folio available, so this saves
> a call to compound_head().
>
> Acked-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   include/linux/hugetlb.h | 6 +++---
>   mm/hugetlb.c            | 6 +++---
>   mm/memory-failure.c     | 2 +-
>   mm/migrate.c            | 2 +-
>   4 files changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 3f3e62880279..bebf4c3a53ef 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -178,7 +178,7 @@ bool hugetlbfs_pagecache_present(struct hstate *h,
>   				 struct vm_area_struct *vma,
>   				 unsigned long address);
>   
> -struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage);
> +struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio);
>   
>   extern int sysctl_hugetlb_shm_group;
>   extern struct list_head huge_boot_pages[MAX_NUMNODES];
> @@ -297,8 +297,8 @@ static inline unsigned long hugetlb_total_pages(void)
>   	return 0;
>   }
>   
> -static inline struct address_space *hugetlb_page_mapping_lock_write(
> -							struct page *hpage)
> +static inline struct address_space *hugetlb_folio_mapping_lock_write(
> +							struct folio *folio)
>   {
>   	return NULL;
>   }
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 456c81fbf8f5..707c85303e88 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2155,13 +2155,13 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
>   /*
>    * Find and lock address space (mapping) in write mode.
>    *
> - * Upon entry, the page is locked which means that page_mapping() is
> + * Upon entry, the folio is locked which means that folio_mapping() is
>    * stable.  Due to locking order, we can only trylock_write.  If we can
>    * not get the lock, simply return NULL to caller.
>    */
> -struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage)
> +struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio)
>   {
> -	struct address_space *mapping = page_mapping(hpage);
> +	struct address_space *mapping = folio_mapping(folio);
>   
>   	if (!mapping)
>   		return mapping;
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 2e64e132bba1..0a45fb7fb055 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1608,7 +1608,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
>   		 * TTU_RMAP_LOCKED to indicate we have taken the lock
>   		 * at this higher level.
>   		 */
> -		mapping = hugetlb_page_mapping_lock_write(hpage);
> +		mapping = hugetlb_folio_mapping_lock_write(folio);
>   		if (mapping) {
>   			try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
>   			i_mmap_unlock_write(mapping);
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 285072bca29c..f8da9b89e043 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -1425,7 +1425,7 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio,
>   			 * semaphore in write mode here and set TTU_RMAP_LOCKED
>   			 * to let lower levels know we have taken the lock.
>   			 */
> -			mapping = hugetlb_page_mapping_lock_write(&src->page);
> +			mapping = hugetlb_folio_mapping_lock_write(src);
>   			if (unlikely(!mapping))
>   				goto unlock_put_anon;
>   

Looks good.

Reviewed-by: Jane Chu  <jane.chu@oracle.com>

-jane



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-08 19:42 ` [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio Matthew Wilcox (Oracle)
@ 2024-04-09  0:34   ` Jane Chu
  2024-04-10 10:21     ` Oscar Salvador
  0 siblings, 1 reply; 48+ messages in thread
From: Jane Chu @ 2024-04-09  0:34 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Miaohe Lin; +Cc: linux-mm

On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:

> Saves dozens of calls to compound_head().
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   mm/memory-failure.c | 40 +++++++++++++++++++++-------------------
>   1 file changed, 21 insertions(+), 19 deletions(-)
>
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 0a45fb7fb055..1c7c73776604 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -2173,7 +2173,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
>   int memory_failure(unsigned long pfn, int flags)
>   {
>   	struct page *p;
> -	struct page *hpage;
> +	struct folio *folio;
>   	struct dev_pagemap *pgmap;
>   	int res = 0;
>   	unsigned long page_flags;
> @@ -2261,8 +2261,8 @@ int memory_failure(unsigned long pfn, int flags)
>   		}
>   	}
>   
> -	hpage = compound_head(p);
> -	if (PageTransHuge(hpage)) {
> +	folio = page_folio(p);
> +	if (folio_test_large(folio)) {
>   		/*
>   		 * The flag must be set after the refcount is bumped
>   		 * otherwise it may race with THP split.
> @@ -2276,12 +2276,13 @@ int memory_failure(unsigned long pfn, int flags)
>   		 * or unhandlable page.  The refcount is bumped iff the
>   		 * page is a valid handlable page.
>   		 */
> -		SetPageHasHWPoisoned(hpage);
> +		folio_set_has_hwpoisoned(folio);
>   		if (try_to_split_thp_page(p) < 0) {
>   			res = action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
>   			goto unlock_mutex;
>   		}
>   		VM_BUG_ON_PAGE(!page_count(p), p);
> +		folio = page_folio(p);
>   	}
>   
>   	/*
> @@ -2292,9 +2293,9 @@ int memory_failure(unsigned long pfn, int flags)
>   	 * The check (unnecessarily) ignores LRU pages being isolated and
>   	 * walked by the page reclaim code, however that's not a big loss.
>   	 */
> -	shake_page(p);
> +	shake_folio(folio);
>   
> -	lock_page(p);
> +	folio_lock(folio);
>   
>   	/*
>   	 * We're only intended to deal with the non-Compound page here.
> @@ -2302,11 +2303,11 @@ int memory_failure(unsigned long pfn, int flags)
>   	 * race window. If this happens, we could try again to hopefully
>   	 * handle the page next round.
>   	 */
> -	if (PageCompound(p)) {
> +	if (folio_test_large(folio)) {

At this stage,  'p' is not expected to be a large page since a _refcount 
has been taken, right?  So is it reasonable to replace the "if (retry)" 
block with a VM_BUG_ON warning?  otherwise, if 'p' became part of a 
different large folio,  how to recover from here ?

>   		if (retry) {
>   			ClearPageHWPoison(p);
> -			unlock_page(p);
> -			put_page(p);
> +			folio_unlock(folio);
> +			folio_put(folio);
>   			flags &= ~MF_COUNT_INCREASED;
>   			retry = false;
>   			goto try_again;
> @@ -2322,29 +2323,29 @@ int memory_failure(unsigned long pfn, int flags)
>   	 * folio_remove_rmap_*() in try_to_unmap_one(). So to determine page
>   	 * status correctly, we save a copy of the page flags at this time.
>   	 */
> -	page_flags = p->flags;
> +	page_flags = folio->flags;
>   
>   	if (hwpoison_filter(p)) {
>   		ClearPageHWPoison(p);
> -		unlock_page(p);
> -		put_page(p);
> +		folio_unlock(folio);
> +		folio_put(folio);
>   		res = -EOPNOTSUPP;
>   		goto unlock_mutex;
>   	}
>   
>   	/*
> -	 * __munlock_folio() may clear a writeback page's LRU flag without
> -	 * page_lock. We need wait writeback completion for this page or it
> -	 * may trigger vfs BUG while evict inode.
> +	 * __munlock_folio() may clear a writeback folio's LRU flag without
> +	 * the folio lock. We need to wait for writeback completion for this
> +	 * folio or it may trigger a vfs BUG while evicting inode.
>   	 */
> -	if (!PageLRU(p) && !PageWriteback(p))
> +	if (!folio_test_lru(folio) && !folio_test_writeback(folio))
>   		goto identify_page_state;
>   
>   	/*
>   	 * It's very difficult to mess with pages currently under IO
>   	 * and in many cases impossible, so we just avoid it here.
>   	 */
> -	wait_on_page_writeback(p);
> +	folio_wait_writeback(folio);
>   
>   	/*
>   	 * Now take care of user space mappings.
> @@ -2358,7 +2359,8 @@ int memory_failure(unsigned long pfn, int flags)
>   	/*
>   	 * Torn down by someone else?
>   	 */
> -	if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
> +	if (folio_test_lru(folio) && !folio_test_swapcache(folio) &&
> +	    folio->mapping == NULL) {
>   		res = action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
>   		goto unlock_page;
>   	}
> @@ -2368,7 +2370,7 @@ int memory_failure(unsigned long pfn, int flags)
>   	mutex_unlock(&mf_mutex);
>   	return res;
>   unlock_page:
> -	unlock_page(p);
> +	folio_unlock(folio);
>   unlock_mutex:
>   	mutex_unlock(&mf_mutex);
>   	return res;

thanks,

-jane



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 08/11] mm/memory-failure: Convert hwpoison_user_mappings to take a folio
  2024-04-08 19:42 ` [PATCH v2 08/11] mm/memory-failure: Convert hwpoison_user_mappings to take " Matthew Wilcox (Oracle)
@ 2024-04-09  6:15   ` Jane Chu
  0 siblings, 0 replies; 48+ messages in thread
From: Jane Chu @ 2024-04-09  6:15 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Miaohe Lin; +Cc: linux-mm

On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:

> Pass the folio from the callers, and use it throughout instead of hpage.
> Saves dozens of calls to compound_head().
>
> Acked-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   mm/memory-failure.c | 30 +++++++++++++++---------------
>   1 file changed, 15 insertions(+), 15 deletions(-)
>
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 1c7c73776604..fae0b42f0aaf 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1543,24 +1543,24 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
>    * Do all that is necessary to remove user space mappings. Unmap
>    * the pages and send SIGBUS to the processes if the data was dirty.
>    */
> -static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
> -				  int flags, struct page *hpage)
> +static bool hwpoison_user_mappings(struct folio *folio, struct page *p,
> +		unsigned long pfn, int flags)
>   {
> -	struct folio *folio = page_folio(hpage);
>   	enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
>   	struct address_space *mapping;
>   	LIST_HEAD(tokill);
>   	bool unmap_success;
>   	int forcekill;
> -	bool mlocked = PageMlocked(hpage);
> +	bool mlocked = folio_test_mlocked(folio);
>   
>   	/*
>   	 * Here we are interested only in user-mapped pages, so skip any
>   	 * other types of pages.
>   	 */
> -	if (PageReserved(p) || PageSlab(p) || PageTable(p) || PageOffline(p))
> +	if (folio_test_reserved(folio) || folio_test_slab(folio) ||
> +	    folio_test_pgtable(folio) || folio_test_offline(folio))
>   		return true;
> -	if (!(PageLRU(hpage) || PageHuge(p)))
> +	if (!(folio_test_lru(folio) || folio_test_hugetlb(folio)))
>   		return true;
>   
>   	/*
> @@ -1570,7 +1570,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
>   	if (!page_mapped(p))
>   		return true;
>   
> -	if (PageSwapCache(p)) {
> +	if (folio_test_swapcache(folio)) {
>   		pr_err("%#lx: keeping poisoned page in swap cache\n", pfn);
>   		ttu &= ~TTU_HWPOISON;
>   	}
> @@ -1581,11 +1581,11 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
>   	 * XXX: the dirty test could be racy: set_page_dirty() may not always
>   	 * be called inside page lock (it's recommended but not enforced).
>   	 */
> -	mapping = page_mapping(hpage);
> -	if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping &&
> +	mapping = folio_mapping(folio);
> +	if (!(flags & MF_MUST_KILL) && !folio_test_dirty(folio) && mapping &&
>   	    mapping_can_writeback(mapping)) {
> -		if (page_mkclean(hpage)) {
> -			SetPageDirty(hpage);
> +		if (folio_mkclean(folio)) {
> +			folio_set_dirty(folio);
>   		} else {
>   			ttu &= ~TTU_HWPOISON;
>   			pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
> @@ -1600,7 +1600,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
>   	 */
>   	collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED);
>   
> -	if (PageHuge(hpage) && !PageAnon(hpage)) {
> +	if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
>   		/*
>   		 * For hugetlb pages in shared mappings, try_to_unmap
>   		 * could potentially call huge_pmd_unshare.  Because of
> @@ -1640,7 +1640,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
>   	 * use a more force-full uncatchable kill to prevent
>   	 * any accesses to the poisoned memory.
>   	 */
> -	forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL) ||
> +	forcekill = folio_test_dirty(folio) || (flags & MF_MUST_KILL) ||
>   		    !unmap_success;
>   	kill_procs(&tokill, forcekill, !unmap_success, pfn, flags);
>   
> @@ -2084,7 +2084,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
>   
>   	page_flags = folio->flags;
>   
> -	if (!hwpoison_user_mappings(p, pfn, flags, &folio->page)) {
> +	if (!hwpoison_user_mappings(folio, p, pfn, flags)) {
>   		folio_unlock(folio);
>   		return action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
>   	}
> @@ -2351,7 +2351,7 @@ int memory_failure(unsigned long pfn, int flags)
>   	 * Now take care of user space mappings.
>   	 * Abort on fail: __filemap_remove_folio() assumes unmapped page.
>   	 */
> -	if (!hwpoison_user_mappings(p, pfn, flags, p)) {
> +	if (!hwpoison_user_mappings(folio, p, pfn, flags)) {
>   		res = action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
>   		goto unlock_page;
>   	}

Looks good to me.

Reviewed-by: Jane Chu <jane.chu@oracle.com>

-jane



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 09/11] mm/memory-failure: Add some folio conversions to unpoison_memory
  2024-04-08 19:42 ` [PATCH v2 09/11] mm/memory-failure: Add some folio conversions to unpoison_memory Matthew Wilcox (Oracle)
@ 2024-04-09  6:17   ` Jane Chu
  0 siblings, 0 replies; 48+ messages in thread
From: Jane Chu @ 2024-04-09  6:17 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Miaohe Lin; +Cc: linux-mm

On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:

> Some of these folio APIs didn't exist when the unpoison_memory()
> conversion was done originally.
>
> Acked-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   mm/memory-failure.c | 8 ++++----
>   1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index fae0b42f0aaf..7caec2fc3327 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -2540,8 +2540,8 @@ int unpoison_memory(unsigned long pfn)
>   		goto unlock_mutex;
>   	}
>   
> -	if (folio_test_slab(folio) || PageTable(&folio->page) ||
> -	    folio_test_reserved(folio) || PageOffline(&folio->page))
> +	if (folio_test_slab(folio) || folio_test_pgtable(folio) ||
> +	    folio_test_reserved(folio) || folio_test_offline(folio))
>   		goto unlock_mutex;
>   
>   	/*
> @@ -2562,7 +2562,7 @@ int unpoison_memory(unsigned long pfn)
>   
>   	ghp = get_hwpoison_page(p, MF_UNPOISON);
>   	if (!ghp) {
> -		if (PageHuge(p)) {
> +		if (folio_test_hugetlb(folio)) {
>   			huge = true;
>   			count = folio_free_raw_hwp(folio, false);
>   			if (count == 0)
> @@ -2578,7 +2578,7 @@ int unpoison_memory(unsigned long pfn)
>   					 pfn, &unpoison_rs);
>   		}
>   	} else {
> -		if (PageHuge(p)) {
> +		if (folio_test_hugetlb(folio)) {
>   			huge = true;
>   			count = folio_free_raw_hwp(folio, false);
>   			if (count == 0) {

Looks good.

Reviewed-by: Jane Chu <jane.chu@oracle.com>

-jane



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 10/11] mm/memory-failure: Use folio functions throughout collect_procs()
  2024-04-08 19:42 ` [PATCH v2 10/11] mm/memory-failure: Use folio functions throughout collect_procs() Matthew Wilcox (Oracle)
@ 2024-04-09  6:19   ` Jane Chu
  0 siblings, 0 replies; 48+ messages in thread
From: Jane Chu @ 2024-04-09  6:19 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Miaohe Lin; +Cc: linux-mm

On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:

> Saves a couple of calls to compound_head().
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   mm/memory-failure.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 7caec2fc3327..1c3cf1fe4964 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -712,9 +712,9 @@ static void collect_procs(struct folio *folio, struct page *page,
>   {
>   	if (!folio->mapping)
>   		return;
> -	if (unlikely(PageKsm(page)))
> +	if (unlikely(folio_test_ksm(folio)))
>   		collect_procs_ksm(page, tokill, force_early);
> -	else if (PageAnon(page))
> +	else if (folio_test_anon(folio))
>   		collect_procs_anon(folio, page, tokill, force_early);
>   	else
>   		collect_procs_file(folio, page, tokill, force_early);

Looks good.

Reviewed-by: Jane Chu <jane.chu@oracle.com>

-jane



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 11/11] mm/memory-failure: Pass the folio to collect_procs_ksm()
  2024-04-08 19:42 ` [PATCH v2 11/11] mm/memory-failure: Pass the folio to collect_procs_ksm() Matthew Wilcox (Oracle)
@ 2024-04-09  6:27   ` Jane Chu
  2024-04-09 12:11     ` Matthew Wilcox
  0 siblings, 1 reply; 48+ messages in thread
From: Jane Chu @ 2024-04-09  6:27 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Miaohe Lin; +Cc: linux-mm

On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:

> We've already calculated it, so pass it in instead of recalculating it
> in collect_procs_ksm().
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   include/linux/ksm.h | 14 +++-----------
>   mm/ksm.c            |  5 ++---
>   mm/memory-failure.c |  2 +-
>   3 files changed, 6 insertions(+), 15 deletions(-)
>
> diff --git a/include/linux/ksm.h b/include/linux/ksm.h
> index 358803cfd4d5..52c63a9c5a9c 100644
> --- a/include/linux/ksm.h
> +++ b/include/linux/ksm.h
> @@ -81,15 +81,9 @@ struct folio *ksm_might_need_to_copy(struct folio *folio,
>   
>   void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc);
>   void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
> -
> -#ifdef CONFIG_MEMORY_FAILURE
> -void collect_procs_ksm(struct page *page, struct list_head *to_kill,
> -		       int force_early);
> -#endif
> -
> -#ifdef CONFIG_PROC_FS
> +void collect_procs_ksm(struct folio *folio, struct page *page,
> +		struct list_head *to_kill, int force_early);
>   long ksm_process_profit(struct mm_struct *);
> -#endif /* CONFIG_PROC_FS */
Why is the #ifdef-#endif CONFIG_PROC_FS removed?  In ksm.c, 
ksm_process_profit() is defined within the config switch.
>   
>   #else  /* !CONFIG_KSM */
>   
> @@ -120,12 +114,10 @@ static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
>   {
>   }
>   
> -#ifdef CONFIG_MEMORY_FAILURE
> -static inline void collect_procs_ksm(struct page *page,
> +static inline void collect_procs_ksm(struct folio *folio, struct page *page,
>   				     struct list_head *to_kill, int force_early)
>   {
>   }
> -#endif
>   
>   #ifdef CONFIG_MMU
>   static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
> diff --git a/mm/ksm.c b/mm/ksm.c
> index 8c001819cf10..7d032b52002f 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -3172,12 +3172,11 @@ void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc)
>   /*
>    * Collect processes when the error hit an ksm page.
>    */
> -void collect_procs_ksm(struct page *page, struct list_head *to_kill,
> -		       int force_early)
> +void collect_procs_ksm(struct folio *folio, struct page *page,
> +		struct list_head *to_kill, int force_early)
>   {
>   	struct ksm_stable_node *stable_node;
>   	struct ksm_rmap_item *rmap_item;
> -	struct folio *folio = page_folio(page);
>   	struct vm_area_struct *vma;
>   	struct task_struct *tsk;
>   
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 1c3cf1fe4964..0630e34b583b 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -713,7 +713,7 @@ static void collect_procs(struct folio *folio, struct page *page,
>   	if (!folio->mapping)
>   		return;
>   	if (unlikely(folio_test_ksm(folio)))
> -		collect_procs_ksm(page, tokill, force_early);
> +		collect_procs_ksm(folio, page, tokill, force_early);
>   	else if (folio_test_anon(folio))
>   		collect_procs_anon(folio, page, tokill, force_early);
>   	else

thanks,

-jane



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 00/11] Some cleanups for memory-failure
  2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
                   ` (10 preceding siblings ...)
  2024-04-08 19:42 ` [PATCH v2 11/11] mm/memory-failure: Pass the folio to collect_procs_ksm() Matthew Wilcox (Oracle)
@ 2024-04-09  6:28 ` Jane Chu
  2024-04-09 12:12   ` Matthew Wilcox
  11 siblings, 1 reply; 48+ messages in thread
From: Jane Chu @ 2024-04-09  6:28 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Miaohe Lin; +Cc: linux-mm

Hi, Matthew,

What is the series based on?

thanks,

-jane

On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:
> A lot of folio conversions, plus some other simplifications.
>
> Matthew Wilcox (Oracle) (11):
>    mm/memory-failure: Remove fsdax_pgoff argument from __add_to_kill
>    mm/memory-failure: Pass addr to __add_to_kill()
>    mm: Return the address from page_mapped_in_vma()
>    mm: Make page_mapped_in_vma conditional on CONFIG_MEMORY_FAILURE
>    mm/memory-failure: Convert shake_page() to shake_folio()
>    mm: Convert hugetlb_page_mapping_lock_write to folio
>    mm/memory-failure: Convert memory_failure() to use a folio
>    mm/memory-failure: Convert hwpoison_user_mappings to take a folio
>    mm/memory-failure: Add some folio conversions to unpoison_memory
>    mm/memory-failure: Use folio functions throughout collect_procs()
>    mm/memory-failure: Pass the folio to collect_procs_ksm()
>
>   include/linux/hugetlb.h |   6 +-
>   include/linux/ksm.h     |  14 +----
>   include/linux/mm.h      |   1 -
>   mm/hugetlb.c            |   6 +-
>   mm/hwpoison-inject.c    |  11 ++--
>   mm/internal.h           |   1 +
>   mm/ksm.c                |   5 +-
>   mm/memory-failure.c     | 130 ++++++++++++++++++++--------------------
>   mm/migrate.c            |   2 +-
>   mm/page_vma_mapped.c    |  16 +++--
>   10 files changed, 94 insertions(+), 98 deletions(-)
>


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 04/11] mm: Make page_mapped_in_vma conditional on CONFIG_MEMORY_FAILURE
  2024-04-08 22:52     ` Matthew Wilcox
@ 2024-04-09  6:35       ` Jane Chu
  0 siblings, 0 replies; 48+ messages in thread
From: Jane Chu @ 2024-04-09  6:35 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Miaohe Lin, linux-mm

On 4/8/2024 3:52 PM, Matthew Wilcox wrote:

> On Mon, Apr 08, 2024 at 03:45:37PM -0700, Jane Chu wrote:
>> On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:
>>> +#ifdef CONFIG_MEMORY_FAILURE
>>>    /**
>>>     * page_mapped_in_vma - check whether a page is really mapped in a VMA
>>>     * @page: the page to test
>>> @@ -344,3 +345,4 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
>>>    out:
>>>    	return pvmw.address;
>>>    }
>>> +#endif
>> Should below be put in the CONFIG_MEMORY_FAILURE bracket as well ?
>>
>> include/linux/rmap.h: int page_mapped_in_vma(struct page *page, struct
>> vm_area_struct *vma);
> I don't see why we should; I see no advantage to adding that ifdef.
> What would motivate you to add it?

Just to be consistent, like, either both function definition and 
function declaration be put in the config switch bracket, or neither.  
Perhaps this is a nitpick, up to you.

Thanks,

-jane

>
>> with that,
>>
>> Reviewed-by: Jane Chu <jane.chu@oracle.com>
> Thanks!


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 11/11] mm/memory-failure: Pass the folio to collect_procs_ksm()
  2024-04-09  6:27   ` Jane Chu
@ 2024-04-09 12:11     ` Matthew Wilcox
  2024-04-09 15:15       ` Jane Chu
  0 siblings, 1 reply; 48+ messages in thread
From: Matthew Wilcox @ 2024-04-09 12:11 UTC (permalink / raw)
  To: Jane Chu; +Cc: Miaohe Lin, linux-mm

On Mon, Apr 08, 2024 at 11:27:02PM -0700, Jane Chu wrote:
> On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:
> > +++ b/include/linux/ksm.h
> > @@ -81,15 +81,9 @@ struct folio *ksm_might_need_to_copy(struct folio *folio,
> >   void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc);
> >   void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
> > -
> > -#ifdef CONFIG_MEMORY_FAILURE
> > -void collect_procs_ksm(struct page *page, struct list_head *to_kill,
> > -		       int force_early);
> > -#endif
> > -
> > -#ifdef CONFIG_PROC_FS
> > +void collect_procs_ksm(struct folio *folio, struct page *page,
> > +		struct list_head *to_kill, int force_early);
> >   long ksm_process_profit(struct mm_struct *);
> > -#endif /* CONFIG_PROC_FS */
> Why is the #ifdef-#endif CONFIG_PROC_FS removed?  In ksm.c,
> ksm_process_profit() is defined within the config switch.

Yes, but there's no need to put ifdefs around function declarations.
All it does is add a rather bogus dependency on the CONFIG symbol.
There's no harm in having a declaration for a function which doesn't
exist.



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 00/11] Some cleanups for memory-failure
  2024-04-09  6:28 ` [PATCH v2 00/11] Some cleanups for memory-failure Jane Chu
@ 2024-04-09 12:12   ` Matthew Wilcox
  0 siblings, 0 replies; 48+ messages in thread
From: Matthew Wilcox @ 2024-04-09 12:12 UTC (permalink / raw)
  To: Jane Chu; +Cc: Miaohe Lin, linux-mm

On Mon, Apr 08, 2024 at 11:28:26PM -0700, Jane Chu wrote:
> Hi, Matthew,
> 
> What is the series based on?

It's against next-20240408 but shouldn't have any
dependencies outside of branch 'mm-everything' of
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 11/11] mm/memory-failure: Pass the folio to collect_procs_ksm()
  2024-04-09 12:11     ` Matthew Wilcox
@ 2024-04-09 15:15       ` Jane Chu
  0 siblings, 0 replies; 48+ messages in thread
From: Jane Chu @ 2024-04-09 15:15 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Miaohe Lin, linux-mm

On 4/9/2024 5:11 AM, Matthew Wilcox wrote:

> On Mon, Apr 08, 2024 at 11:27:02PM -0700, Jane Chu wrote:
>> On 4/8/2024 12:42 PM, Matthew Wilcox (Oracle) wrote:
>>> +++ b/include/linux/ksm.h
>>> @@ -81,15 +81,9 @@ struct folio *ksm_might_need_to_copy(struct folio *folio,
>>>    void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc);
>>>    void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
>>> -
>>> -#ifdef CONFIG_MEMORY_FAILURE
>>> -void collect_procs_ksm(struct page *page, struct list_head *to_kill,
>>> -		       int force_early);
>>> -#endif
>>> -
>>> -#ifdef CONFIG_PROC_FS
>>> +void collect_procs_ksm(struct folio *folio, struct page *page,
>>> +		struct list_head *to_kill, int force_early);
>>>    long ksm_process_profit(struct mm_struct *);
>>> -#endif /* CONFIG_PROC_FS */
>> Why is the #ifdef-#endif CONFIG_PROC_FS removed?  In ksm.c,
>> ksm_process_profit() is defined within the config switch.
> Yes, but there's no need to put ifdefs around function declarations.
> All it does is add a rather bogus dependency on the CONFIG symbol.
> There's no harm in having a declaration for a function which doesn't
> exist.

I see, sounds good.

Reviewed-by: Jane Chu <jane.chu@oracle.com>

-jane



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 01/11] mm/memory-failure: Remove fsdax_pgoff argument from __add_to_kill
  2024-04-08 19:42 ` [PATCH v2 01/11] mm/memory-failure: Remove fsdax_pgoff argument from __add_to_kill Matthew Wilcox (Oracle)
@ 2024-04-10  9:09   ` Oscar Salvador
  0 siblings, 0 replies; 48+ messages in thread
From: Oscar Salvador @ 2024-04-10  9:09 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: Miaohe Lin, linux-mm, Jane Chu, Dan Williams

On Mon, Apr 08, 2024 at 08:42:19PM +0100, Matthew Wilcox (Oracle) wrote:
> Unify the KSM and DAX codepaths by calculating the addr in
> add_to_kill_fsdax() instead of telling __add_to_kill() to calculate it.
> 
> Acked-by: Miaohe Lin <linmiaohe@huawei.com>
> Reviewed-by: Jane Chu <jane.chu@oracle.com>
> Reviewed-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Oscar Salvador <osalvador@suse.de>


-- 
Oscar Salvador
SUSE Labs


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 02/11] mm/memory-failure: Pass addr to __add_to_kill()
  2024-04-08 19:42 ` [PATCH v2 02/11] mm/memory-failure: Pass addr to __add_to_kill() Matthew Wilcox (Oracle)
  2024-04-08 22:32   ` Jane Chu
@ 2024-04-10  9:11   ` Oscar Salvador
  1 sibling, 0 replies; 48+ messages in thread
From: Oscar Salvador @ 2024-04-10  9:11 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: Miaohe Lin, linux-mm, Longlong Xia

On Mon, Apr 08, 2024 at 08:42:20PM +0100, Matthew Wilcox (Oracle) wrote:
> Handle anon/file folios the same way as KSM & DAX folios by passing in
> the address.
> 
> Cc: Longlong Xia <xialonglong1@huawei.com>
> Acked-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Oscar Salvador <osalvador@suse.de>

-- 
Oscar Salvador
SUSE Labs


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 03/11] mm: Return the address from page_mapped_in_vma()
  2024-04-08 19:42 ` [PATCH v2 03/11] mm: Return the address from page_mapped_in_vma() Matthew Wilcox (Oracle)
  2024-04-08 22:38   ` Jane Chu
@ 2024-04-10  9:38   ` Oscar Salvador
  2024-04-11  2:56     ` Miaohe Lin
  2024-04-11 17:37     ` Matthew Wilcox
  1 sibling, 2 replies; 48+ messages in thread
From: Oscar Salvador @ 2024-04-10  9:38 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: Miaohe Lin, linux-mm

On Mon, Apr 08, 2024 at 08:42:21PM +0100, Matthew Wilcox (Oracle) wrote:
> The only user of this function calls page_address_in_vma() immediately
> after page_mapped_in_vma() calculates it and uses it to return true/false.
> Return the address instead, allowing memory-failure to skip the call
> to page_address_in_vma().
> 
> Acked-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/page_vma_mapped.c | 14 ++++++++------
>  1 file changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index 53b8868ede61..48bfc17934cd 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -319,9 +319,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>   * @page: the page to test
>   * @vma: the VMA to test
>   *
> - * Returns 1 if the page is mapped into the page tables of the VMA, 0
> - * if the page is not mapped into the page tables of this VMA.  Only
> - * valid for normal file or anonymous VMAs.
> + * Return: The address the page is mapped at if the page is in the range
> + * covered by the VMA and present in the page table.  If the page is
> + * outside the VMA or not present, returns -EFAULT.
> + * Only valid for normal file or anonymous VMAs.

I am probably missing something here but I am confused.
Now we either return -EFAULT or the address.

But page_vma_mapped_walk() gets called from collect_procs_anon() like
this:

if (!page_mapped_in_vma(page, vma))

so that is not gonna work the way we want?
Should not that be converted to

 if (page_mapped_in_vma(page, vma) == -EFAULT)

?

-- 
Oscar Salvador
SUSE Labs


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 04/11] mm: Make page_mapped_in_vma conditional on CONFIG_MEMORY_FAILURE
  2024-04-08 19:42 ` [PATCH v2 04/11] mm: Make page_mapped_in_vma conditional on CONFIG_MEMORY_FAILURE Matthew Wilcox (Oracle)
  2024-04-08 22:45   ` Jane Chu
@ 2024-04-10  9:39   ` Oscar Salvador
  1 sibling, 0 replies; 48+ messages in thread
From: Oscar Salvador @ 2024-04-10  9:39 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: Miaohe Lin, linux-mm

On Mon, Apr 08, 2024 at 08:42:22PM +0100, Matthew Wilcox (Oracle) wrote:
> This function is only currently used by the memory-failure code, so
> we can omit it if we're not compiling in the memory-failure code.
> 
> Suggested-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Oscar Salvador <osalvador@suse.de>


-- 
Oscar Salvador
SUSE Labs


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 06/11] mm: Convert hugetlb_page_mapping_lock_write to folio
  2024-04-08 19:42 ` [PATCH v2 06/11] mm: Convert hugetlb_page_mapping_lock_write to folio Matthew Wilcox (Oracle)
  2024-04-08 23:09   ` Jane Chu
@ 2024-04-10  9:52   ` Oscar Salvador
  1 sibling, 0 replies; 48+ messages in thread
From: Oscar Salvador @ 2024-04-10  9:52 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: Miaohe Lin, linux-mm

On Mon, Apr 08, 2024 at 08:42:24PM +0100, Matthew Wilcox (Oracle) wrote:
> The page is only used to get the mapping, so the folio will do just
> as well.  Both callers already have a folio available, so this saves
> a call to compound_head().
> 
> Acked-by: Miaohe Lin <linmiaohe@huawei.com>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Oscar Salvador <osalvador@suse.de>
 

-- 
Oscar Salvador
SUSE Labs


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-09  0:34   ` Jane Chu
@ 2024-04-10 10:21     ` Oscar Salvador
  2024-04-10 14:23       ` Matthew Wilcox
  2024-04-10 23:15       ` Jane Chu
  0 siblings, 2 replies; 48+ messages in thread
From: Oscar Salvador @ 2024-04-10 10:21 UTC (permalink / raw)
  To: Jane Chu; +Cc: Matthew Wilcox (Oracle), Miaohe Lin, linux-mm

On Mon, Apr 08, 2024 at 05:34:29PM -0700, Jane Chu wrote:
> At this stage,  'p' is not expected to be a large page since a _refcount has
> been taken, right?  So is it reasonable to replace the "if (retry)" block
> with a VM_BUG_ON warning?  otherwise, if 'p' became part of a different
> large folio,  how to recover from here ?

We might have split the THP (if it was part of), but AFAICS
nothing stops the kernel to coallesce this page again into a new THP via e.g:
madvise(MADV_HUGEPAGE) ?


-- 
Oscar Salvador
SUSE Labs


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-10 10:21     ` Oscar Salvador
@ 2024-04-10 14:23       ` Matthew Wilcox
  2024-04-10 15:30         ` Oscar Salvador
  2024-04-10 23:15       ` Jane Chu
  1 sibling, 1 reply; 48+ messages in thread
From: Matthew Wilcox @ 2024-04-10 14:23 UTC (permalink / raw)
  To: Oscar Salvador; +Cc: Jane Chu, Miaohe Lin, linux-mm

On Wed, Apr 10, 2024 at 12:21:07PM +0200, Oscar Salvador wrote:
> On Mon, Apr 08, 2024 at 05:34:29PM -0700, Jane Chu wrote:
> > At this stage,  'p' is not expected to be a large page since a _refcount has
> > been taken, right?  So is it reasonable to replace the "if (retry)" block
> > with a VM_BUG_ON warning?  otherwise, if 'p' became part of a different
> > large folio,  how to recover from here ?
> 
> We might have split the THP (if it was part of), but AFAICS
> nothing stops the kernel to coallesce this page again into a new THP via e.g:
> madvise(MADV_HUGEPAGE) ?

Won't that allocate a _new_ THP rather than coalescing in place?


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-10 14:23       ` Matthew Wilcox
@ 2024-04-10 15:30         ` Oscar Salvador
  0 siblings, 0 replies; 48+ messages in thread
From: Oscar Salvador @ 2024-04-10 15:30 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Jane Chu, Miaohe Lin, linux-mm

On Wed, Apr 10, 2024 at 03:23:10PM +0100, Matthew Wilcox wrote:
> On Wed, Apr 10, 2024 at 12:21:07PM +0200, Oscar Salvador wrote:
> > On Mon, Apr 08, 2024 at 05:34:29PM -0700, Jane Chu wrote:
> > > At this stage,  'p' is not expected to be a large page since a _refcount has
> > > been taken, right?  So is it reasonable to replace the "if (retry)" block
> > > with a VM_BUG_ON warning?  otherwise, if 'p' became part of a different
> > > large folio,  how to recover from here ?
> > 
> > We might have split the THP (if it was part of), but AFAICS
> > nothing stops the kernel to coallesce this page again into a new THP via e.g:
> > madvise(MADV_HUGEPAGE) ?
> 
> Won't that allocate a _new_ THP rather than coalescing in place?

Sorry, yes, I think it will. I guess I meant MADV_COLLAPSE.
My point is that somehow this page can be used to form a new THP, or is
there anything stoping the page to become part of a compound page again?


-- 
Oscar Salvador
SUSE Labs


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-10 10:21     ` Oscar Salvador
  2024-04-10 14:23       ` Matthew Wilcox
@ 2024-04-10 23:15       ` Jane Chu
  2024-04-11  1:27         ` Jane Chu
  1 sibling, 1 reply; 48+ messages in thread
From: Jane Chu @ 2024-04-10 23:15 UTC (permalink / raw)
  To: Oscar Salvador; +Cc: Matthew Wilcox (Oracle), Miaohe Lin, linux-mm

On 4/10/2024 3:21 AM, Oscar Salvador wrote:

> On Mon, Apr 08, 2024 at 05:34:29PM -0700, Jane Chu wrote:
>> At this stage,  'p' is not expected to be a large page since a _refcount has
>> been taken, right?  So is it reasonable to replace the "if (retry)" block
>> with a VM_BUG_ON warning?  otherwise, if 'p' became part of a different
>> large folio,  how to recover from here ?
> We might have split the THP (if it was part of), but AFAICS
> nothing stops the kernel to coallesce this page again into a new THP via e.g:
> madvise(MADV_HUGEPAGE) ?

Good point, thanks!

-jane

>


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-10 23:15       ` Jane Chu
@ 2024-04-11  1:27         ` Jane Chu
  2024-04-11  1:51           ` Matthew Wilcox
  0 siblings, 1 reply; 48+ messages in thread
From: Jane Chu @ 2024-04-11  1:27 UTC (permalink / raw)
  To: Oscar Salvador; +Cc: Matthew Wilcox (Oracle), Miaohe Lin, linux-mm

On 4/10/2024 4:15 PM, Jane Chu wrote:

> On 4/10/2024 3:21 AM, Oscar Salvador wrote:
>
>> On Mon, Apr 08, 2024 at 05:34:29PM -0700, Jane Chu wrote:
>>> At this stage,  'p' is not expected to be a large page since a 
>>> _refcount has
>>> been taken, right?  So is it reasonable to replace the "if (retry)" 
>>> block
>>> with a VM_BUG_ON warning?  otherwise, if 'p' became part of a different
>>> large folio,  how to recover from here ?
>> We might have split the THP (if it was part of), but AFAICS
>> nothing stops the kernel to coallesce this page again into a new THP 
>> via e.g:
>> madvise(MADV_HUGEPAGE) ?
>
> Good point, thanks!

Come to think of it a bit more, maybe the check could be

     if (folio_test_large(folio) || (folio != page_folio(p))) {

? Because, 'p' could have become part of a THP after shake_folio() and 
before folio_lock(), right?

thanks,

-jane


>
> -jane
>
>>


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-11  1:27         ` Jane Chu
@ 2024-04-11  1:51           ` Matthew Wilcox
  2024-04-11  9:00             ` Miaohe Lin
  0 siblings, 1 reply; 48+ messages in thread
From: Matthew Wilcox @ 2024-04-11  1:51 UTC (permalink / raw)
  To: Jane Chu; +Cc: Oscar Salvador, Miaohe Lin, linux-mm

On Wed, Apr 10, 2024 at 06:27:38PM -0700, Jane Chu wrote:
> On 4/10/2024 4:15 PM, Jane Chu wrote:
> 
> > On 4/10/2024 3:21 AM, Oscar Salvador wrote:
> > 
> > > On Mon, Apr 08, 2024 at 05:34:29PM -0700, Jane Chu wrote:
> > > > At this stage,  'p' is not expected to be a large page since a
> > > > _refcount has
> > > > been taken, right?  So is it reasonable to replace the "if
> > > > (retry)" block
> > > > with a VM_BUG_ON warning?  otherwise, if 'p' became part of a different
> > > > large folio,  how to recover from here ?
> > > We might have split the THP (if it was part of), but AFAICS
> > > nothing stops the kernel to coallesce this page again into a new THP
> > > via e.g:
> > > madvise(MADV_HUGEPAGE) ?
> > 
> > Good point, thanks!
> 
> Come to think of it a bit more, maybe the check could be
> 
>     if (folio_test_large(folio) || (folio != page_folio(p))) {
> 
> ? Because, 'p' could have become part of a THP after shake_folio() and
> before folio_lock(), right?

What is the mechanism for reassembling a large folio?  I don't see that
code anywhere.


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 03/11] mm: Return the address from page_mapped_in_vma()
  2024-04-10  9:38   ` Oscar Salvador
@ 2024-04-11  2:56     ` Miaohe Lin
  2024-04-11 17:37     ` Matthew Wilcox
  1 sibling, 0 replies; 48+ messages in thread
From: Miaohe Lin @ 2024-04-11  2:56 UTC (permalink / raw)
  To: Oscar Salvador, Matthew Wilcox (Oracle); +Cc: linux-mm

On 2024/4/10 17:38, Oscar Salvador wrote:
> On Mon, Apr 08, 2024 at 08:42:21PM +0100, Matthew Wilcox (Oracle) wrote:
>> The only user of this function calls page_address_in_vma() immediately
>> after page_mapped_in_vma() calculates it and uses it to return true/false.
>> Return the address instead, allowing memory-failure to skip the call
>> to page_address_in_vma().
>>
>> Acked-by: Miaohe Lin <linmiaohe@huawei.com>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> ---
>>  mm/page_vma_mapped.c | 14 ++++++++------
>>  1 file changed, 8 insertions(+), 6 deletions(-)
>>
>> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
>> index 53b8868ede61..48bfc17934cd 100644
>> --- a/mm/page_vma_mapped.c
>> +++ b/mm/page_vma_mapped.c
>> @@ -319,9 +319,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>>   * @page: the page to test
>>   * @vma: the VMA to test
>>   *
>> - * Returns 1 if the page is mapped into the page tables of the VMA, 0
>> - * if the page is not mapped into the page tables of this VMA.  Only
>> - * valid for normal file or anonymous VMAs.
>> + * Return: The address the page is mapped at if the page is in the range
>> + * covered by the VMA and present in the page table.  If the page is
>> + * outside the VMA or not present, returns -EFAULT.
>> + * Only valid for normal file or anonymous VMAs.
> 
> I am probably missing something here but I am confused.
> Now we either return -EFAULT or the address.
> 
> But page_vma_mapped_walk() gets called from collect_procs_anon() like
> this:
> 
> if (!page_mapped_in_vma(page, vma))
> 
> so that is not gonna work the way we want?
> Should not that be converted to
> 
>  if (page_mapped_in_vma(page, vma) == -EFAULT)
> 
> ?

It seems this patch is incomplete. It forgets to change the caller parts as it once did at [1].
Or am I miss something too?

[1] https://www.spinics.net/lists/linux-mm/msg375668.html

Thanks.
.

> 



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-11  1:51           ` Matthew Wilcox
@ 2024-04-11  9:00             ` Miaohe Lin
  2024-04-11 11:23               ` Oscar Salvador
  2024-04-12 19:48               ` Matthew Wilcox
  0 siblings, 2 replies; 48+ messages in thread
From: Miaohe Lin @ 2024-04-11  9:00 UTC (permalink / raw)
  To: Matthew Wilcox, Jane Chu, Oscar Salvador; +Cc: linux-mm

On 2024/4/11 9:51, Matthew Wilcox wrote:
> On Wed, Apr 10, 2024 at 06:27:38PM -0700, Jane Chu wrote:
>> On 4/10/2024 4:15 PM, Jane Chu wrote:
>>
>>> On 4/10/2024 3:21 AM, Oscar Salvador wrote:
>>>
>>>> On Mon, Apr 08, 2024 at 05:34:29PM -0700, Jane Chu wrote:
>>>>> At this stage,  'p' is not expected to be a large page since a
>>>>> _refcount has
>>>>> been taken, right?  So is it reasonable to replace the "if
>>>>> (retry)" block
>>>>> with a VM_BUG_ON warning?  otherwise, if 'p' became part of a different
>>>>> large folio,  how to recover from here ?
>>>> We might have split the THP (if it was part of), but AFAICS
>>>> nothing stops the kernel to coallesce this page again into a new THP
>>>> via e.g:
>>>> madvise(MADV_HUGEPAGE) ?
>>>
>>> Good point, thanks!
>>
>> Come to think of it a bit more, maybe the check could be
>>
>>     if (folio_test_large(folio) || (folio != page_folio(p))) {
>>
>> ? Because, 'p' could have become part of a THP after shake_folio() and
>> before folio_lock(), right?

This check is originally introduced via commit:

"""
commit f37d4298aa7f8b74395aa13c728677e2ed86fdaf
Author: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Date:   Wed Aug 6 16:06:49 2014 -0700

    hwpoison: fix race with changing page during offlining

    When a hwpoison page is locked it could change state due to parallel
    modifications.  The original compound page can be torn down and then
    this 4k page becomes part of a differently-size compound page is is a
    standalone regular page.

    Check after the lock if the page is still the same compound page.

    We could go back, grab the new head page and try again but it should be
    quite rare, so I thought this was safest.  A retry loop would be more
    difficult to test and may have more side effects.

    The hwpoison code by design only tries to handle cases that are
    reasonably common in workloads, as visible in page-flags.

    I'm not really that concerned about handling this (likely rare case),
    just not crashing on it.

    Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
    Acked-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
    Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
    Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a013bc94ebbe..44c6bd201d3a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1172,6 +1172,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags)

 	lock_page(hpage);

+	/*
+	 * The page could have changed compound pages during the locking.
+	 * If this happens just bail out.
+	 */
+	if (compound_head(p) != hpage) {
+		action_result(pfn, "different compound page after locking", IGNORED);
+		res = -EBUSY;
+		goto out;
+	}
+
 	/*
 	 * We use page flags to determine what action should be taken, but
 	 * the flags can be modified by the error containment action.  One

"""

And the discussion can be found at [1]. The root cause for the issue is indeterminate, but most probably:

[1] https://lore.kernel.org/linux-mm/20140626195036.GA5311@nhori.redhat.com/

"""
And I think that the problem you report is caused by another part of hwpoison
code, because we have PageSlab check at the beginning of hwpoison_user_mappings(),
so if LRU page truned into slab page just before locking the page, we never
reach try_to_unmap().
I think this was caused by the code around lock migration after thp split
in hwpoison_user_mappings(), which was introduced in commit 54b9dd14d09f
("mm/memory-failure.c: shift page lock from head page to tail page after thp split").
I guess the tail page (raw error page) was freed and turned into Slab page
just after thp split and before locking the error page.
So possible solution is to do page status check again after thp split.
"""

IIUC, it's because the page lock is shifted from @hpage to @p. So there's a window
that p is freed and turned into Slab page.

@@ -942,11 +942,14 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
                         * We pinned the head page for hwpoison handling,
                         * now we split the thp and we are interested in
                         * the hwpoisoned raw page, so move the refcount
-                        * to it.
+                        * to it. Similarly, page lock is shifted.
                         */
                        if (hpage != p) {
                                put_page(hpage);
                                get_page(p);
+                               lock_page(p);
+                               unlock_page(hpage);
+                               *hpagep = p;
                        }
                        /* THP is split, so ppage should be the real poisoned page. */
                        ppage = p;

> 
> What is the mechanism for reassembling a large folio?  I don't see that
> code anywhere.

But as code changes, the above page lock shift is gone. And I think below logic can't
trigger now. As we hold extra page refcnt so page can't be coallesced into a new THP or Slab page.

	/*
	 * We're only intended to deal with the non-Compound page here.
	 * However, the page could have changed compound pages due to
	 * race window. If this happens, we could try again to hopefully
	 * handle the page next round.
	 */
	if (PageCompound(p)) {
		if (retry) {
			ClearPageHWPoison(p);
			unlock_page(p);
			put_page(p);
			flags &= ~MF_COUNT_INCREASED;
			retry = false;
			goto try_again;
		}
		res = action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
		goto unlock_page;
	}

So it might be better to replace above code block as WARN_ON(PageCompound(p)) and remove MF_MSG_DIFFERENT_COMPOUND case.
Any thoughts?

Thanks.
.

> 
> .
> 



^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-11  9:00             ` Miaohe Lin
@ 2024-04-11 11:23               ` Oscar Salvador
  2024-04-11 12:17                 ` Matthew Wilcox
  2024-04-12 19:48               ` Matthew Wilcox
  1 sibling, 1 reply; 48+ messages in thread
From: Oscar Salvador @ 2024-04-11 11:23 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Matthew Wilcox, Jane Chu, linux-mm

On Thu, Apr 11, 2024 at 05:00:33PM +0800, Miaohe Lin wrote:
> But as code changes, the above page lock shift is gone. And I think below logic can't
> trigger now. As we hold extra page refcnt so page can't be coallesced into a new THP or Slab page.

So I was wrong.
I did not check the slap part, but I did check the code that tries to
coallesce pages.
We have the following check in hpage_collapse_scan_pmd():

 /*
  *
  * Here the check may be racy:
  * it may see total_mapcount > refcount in some cases?
  * But such case is ephemeral we could always retry collapse
  * later.  However it may report false positive if the page
  * has excessive GUP pins (i.e. 512).  Anyway the same check
  * will be done again later the risk seems low.
  */
  if (!is_refcount_suitable(folio)) {
          result = SCAN_PAGE_COUNT;
          goto out_unmap;
  }

So I guess that since we hold an extra refcount from memory-failure
code, this page cannot be part of a THP anymore.
If we confirm that the same goes for slab, I would replace that with
your warn check.
In this way, if we ever trigger that warn path, as least we will know
that that possibility exists.

So it is a good way to get rid of false assumptions and code burden.


-- 
Oscar Salvador
SUSE Labs


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-11 11:23               ` Oscar Salvador
@ 2024-04-11 12:17                 ` Matthew Wilcox
  0 siblings, 0 replies; 48+ messages in thread
From: Matthew Wilcox @ 2024-04-11 12:17 UTC (permalink / raw)
  To: Oscar Salvador; +Cc: Miaohe Lin, Jane Chu, linux-mm

On Thu, Apr 11, 2024 at 01:23:43PM +0200, Oscar Salvador wrote:
> On Thu, Apr 11, 2024 at 05:00:33PM +0800, Miaohe Lin wrote:
> > But as code changes, the above page lock shift is gone. And I think below logic can't
> > trigger now. As we hold extra page refcnt so page can't be coallesced into a new THP or Slab page.
> 
> So I was wrong.
> I did not check the slap part, but I did check the code that tries to
> coallesce pages.
> We have the following check in hpage_collapse_scan_pmd():

Even if that check succeeds, collapse_huge_page() works by allocating a
new THP, copying the data into it and freeing the existing pages.  It
doesn't reassemble a THP in-place.  I cannot find anywhere that will
reassemble smaller folios into larger ones (other than the page
allocator, and if you have a refcount on a folio, it can't possibly go
into the page allocator).


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 03/11] mm: Return the address from page_mapped_in_vma()
  2024-04-10  9:38   ` Oscar Salvador
  2024-04-11  2:56     ` Miaohe Lin
@ 2024-04-11 17:37     ` Matthew Wilcox
  1 sibling, 0 replies; 48+ messages in thread
From: Matthew Wilcox @ 2024-04-11 17:37 UTC (permalink / raw)
  To: Oscar Salvador; +Cc: Miaohe Lin, linux-mm

On Wed, Apr 10, 2024 at 11:38:36AM +0200, Oscar Salvador wrote:
> On Mon, Apr 08, 2024 at 08:42:21PM +0100, Matthew Wilcox (Oracle) wrote:
> > The only user of this function calls page_address_in_vma() immediately
> > after page_mapped_in_vma() calculates it and uses it to return true/false.
> > Return the address instead, allowing memory-failure to skip the call
> > to page_address_in_vma().
> > 
> > Acked-by: Miaohe Lin <linmiaohe@huawei.com>
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > ---
> >  mm/page_vma_mapped.c | 14 ++++++++------
> >  1 file changed, 8 insertions(+), 6 deletions(-)
> > 
> > diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> > index 53b8868ede61..48bfc17934cd 100644
> > --- a/mm/page_vma_mapped.c
> > +++ b/mm/page_vma_mapped.c
> > @@ -319,9 +319,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
> >   * @page: the page to test
> >   * @vma: the VMA to test
> >   *
> > - * Returns 1 if the page is mapped into the page tables of the VMA, 0
> > - * if the page is not mapped into the page tables of this VMA.  Only
> > - * valid for normal file or anonymous VMAs.
> > + * Return: The address the page is mapped at if the page is in the range
> > + * covered by the VMA and present in the page table.  If the page is
> > + * outside the VMA or not present, returns -EFAULT.
> > + * Only valid for normal file or anonymous VMAs.
> 
> I am probably missing something here but I am confused.
> Now we either return -EFAULT or the address.

I don't know where the other hunks from this patch went.  Going back
through git reflog, they're missing since the very first version applied
to the tree they're currently part of (using git-am, well actually b4,
I think).  I'll resurrect them from the original posting and resend the
whole patchset.


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-11  9:00             ` Miaohe Lin
  2024-04-11 11:23               ` Oscar Salvador
@ 2024-04-12 19:48               ` Matthew Wilcox
  2024-04-12 22:09                 ` Oscar Salvador
                                   ` (2 more replies)
  1 sibling, 3 replies; 48+ messages in thread
From: Matthew Wilcox @ 2024-04-12 19:48 UTC (permalink / raw)
  To: Miaohe Lin; +Cc: Jane Chu, Oscar Salvador, linux-mm

On Thu, Apr 11, 2024 at 05:00:33PM +0800, Miaohe Lin wrote:
> But as code changes, the above page lock shift is gone. And I think below logic can't
> trigger now. As we hold extra page refcnt so page can't be coallesced into a new THP or Slab page.
> 
> 	/*
> 	 * We're only intended to deal with the non-Compound page here.
> 	 * However, the page could have changed compound pages due to
> 	 * race window. If this happens, we could try again to hopefully
> 	 * handle the page next round.
> 	 */
> 	if (PageCompound(p)) {
> 		if (retry) {
> 			ClearPageHWPoison(p);
> 			unlock_page(p);
> 			put_page(p);
> 			flags &= ~MF_COUNT_INCREASED;
> 			retry = false;
> 			goto try_again;
> 		}
> 		res = action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
> 		goto unlock_page;
> 	}
> 
> So it might be better to replace above code block as WARN_ON(PageCompound(p)) and remove MF_MSG_DIFFERENT_COMPOUND case.
> Any thoughts?

Yes, I think you're right.  As the MM handling of pages has evolved,
people haven't kept memory-failure uptodate.  That's both understandable
and regrettable.

I don't have the time to focus on memory-failure myself; I have a couple
of hundred uses of page->mapping to eliminate.  And I'd want to get a
lot more serious about testing before starting on that journey.

I do have ideas for handling hwpoison without splitting a folio.
I'd also really like to improve memory-failure to handle sub-page-size
blast radius (Intel CXL used to have a blast radius of 256 bytes).
But realistically, these things are never going to rise high enough on
my todo list to actually get done.


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-12 19:48               ` Matthew Wilcox
@ 2024-04-12 22:09                 ` Oscar Salvador
  2024-04-15 18:47                 ` Jane Chu
  2024-04-16  9:13                 ` Miaohe Lin
  2 siblings, 0 replies; 48+ messages in thread
From: Oscar Salvador @ 2024-04-12 22:09 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Miaohe Lin, Jane Chu, linux-mm

On Fri, Apr 12, 2024 at 08:48:11PM +0100, Matthew Wilcox wrote:
> Yes, I think you're right.  As the MM handling of pages has evolved,
> people haven't kept memory-failure uptodate.  That's both understandable
> and regrettable.

We kind of had the same problem with memory-hotplug, but we managed to
get it up to date.

> I don't have the time to focus on memory-failure myself; I have a couple
> of hundred uses of page->mapping to eliminate.  And I'd want to get a
> lot more serious about testing before starting on that journey.
> 
> I do have ideas for handling hwpoison without splitting a folio.

But if you want to recover those pages within a large folio that are
unaffected, you will have to split it eventually?
And so, if we can make sure that the large folio has been split, and
subpage cannot be part of another compound page afterwards, we should be
safe?

But yes, I can see why.
Right now, if we fail to split the folio we do not handle the situation
at all, we just mark the head as HWPoisoned which is pretty unfortunate
because we just lost the chance to contain the error in the subpage,
so we flushed a large folio down the toilet.

I would be interested to hear those ideas, as having the chance to
handle that would be very beneficial, and more deterministic.


Thanks


-- 
Oscar Salvador
SUSE Labs


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-12 19:48               ` Matthew Wilcox
  2024-04-12 22:09                 ` Oscar Salvador
@ 2024-04-15 18:47                 ` Jane Chu
  2024-04-16  9:13                 ` Miaohe Lin
  2 siblings, 0 replies; 48+ messages in thread
From: Jane Chu @ 2024-04-15 18:47 UTC (permalink / raw)
  To: Matthew Wilcox, Miaohe Lin; +Cc: Oscar Salvador, linux-mm, Jane Chu

On 4/12/2024 12:48 PM, Matthew Wilcox wrote:

> On Thu, Apr 11, 2024 at 05:00:33PM +0800, Miaohe Lin wrote:
>> But as code changes, the above page lock shift is gone. And I think below logic can't
>> trigger now. As we hold extra page refcnt so page can't be coallesced into a new THP or Slab page.
>>
>> 	/*
>> 	 * We're only intended to deal with the non-Compound page here.
>> 	 * However, the page could have changed compound pages due to
>> 	 * race window. If this happens, we could try again to hopefully
>> 	 * handle the page next round.
>> 	 */
>> 	if (PageCompound(p)) {
>> 		if (retry) {
>> 			ClearPageHWPoison(p);
>> 			unlock_page(p);
>> 			put_page(p);
>> 			flags &= ~MF_COUNT_INCREASED;
>> 			retry = false;
>> 			goto try_again;
>> 		}
>> 		res = action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
>> 		goto unlock_page;
>> 	}
>>
>> So it might be better to replace above code block as WARN_ON(PageCompound(p)) and remove MF_MSG_DIFFERENT_COMPOUND case.
>> Any thoughts?
> Yes, I think you're right.  As the MM handling of pages has evolved,
> people haven't kept memory-failure uptodate.  That's both understandable
> and regrettable.
>
> I don't have the time to focus on memory-failure myself; I have a couple
> of hundred uses of page->mapping to eliminate.  And I'd want to get a
> lot more serious about testing before starting on that journey.
>
> I do have ideas for handling hwpoison without splitting a folio.
> I'd also really like to improve memory-failure to handle sub-page-size
> blast radius (Intel CXL used to have a blast radius of 256 bytes).
> But realistically, these things are never going to rise high enough on
> my todo list to actually get done.

Yeah, glad to have the issue identified, that's progress.

I'm working on a series dealing with handling hwpoison without splitting 
a THP, will post it for comments.

thanks,

-jane




^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio
  2024-04-12 19:48               ` Matthew Wilcox
  2024-04-12 22:09                 ` Oscar Salvador
  2024-04-15 18:47                 ` Jane Chu
@ 2024-04-16  9:13                 ` Miaohe Lin
  2 siblings, 0 replies; 48+ messages in thread
From: Miaohe Lin @ 2024-04-16  9:13 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Jane Chu, Oscar Salvador, linux-mm

On 2024/4/13 3:48, Matthew Wilcox wrote:
> On Thu, Apr 11, 2024 at 05:00:33PM +0800, Miaohe Lin wrote:
>> But as code changes, the above page lock shift is gone. And I think below logic can't
>> trigger now. As we hold extra page refcnt so page can't be coallesced into a new THP or Slab page.
>>
>> 	/*
>> 	 * We're only intended to deal with the non-Compound page here.
>> 	 * However, the page could have changed compound pages due to
>> 	 * race window. If this happens, we could try again to hopefully
>> 	 * handle the page next round.
>> 	 */
>> 	if (PageCompound(p)) {
>> 		if (retry) {
>> 			ClearPageHWPoison(p);
>> 			unlock_page(p);
>> 			put_page(p);
>> 			flags &= ~MF_COUNT_INCREASED;
>> 			retry = false;
>> 			goto try_again;
>> 		}
>> 		res = action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
>> 		goto unlock_page;
>> 	}
>>
>> So it might be better to replace above code block as WARN_ON(PageCompound(p)) and remove MF_MSG_DIFFERENT_COMPOUND case.
>> Any thoughts?
> 
> Yes, I think you're right.  As the MM handling of pages has evolved,
> people haven't kept memory-failure uptodate.  That's both understandable
> and regrettable.
> 
> I don't have the time to focus on memory-failure myself; I have a couple
> of hundred uses of page->mapping to eliminate.  And I'd want to get a
> lot more serious about testing before starting on that journey.

I would do those clean ups when I am free. I want to make sure whether there
was something I missed before start. :)

> 
> I do have ideas for handling hwpoison without splitting a folio.
> I'd also really like to improve memory-failure to handle sub-page-size
> blast radius (Intel CXL used to have a blast radius of 256 bytes).
> But realistically, these things are never going to rise high enough on
> my todo list to actually get done.

Thanks for your hard work.
.

> .
> 



^ permalink raw reply	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2024-04-16  9:13 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-08 19:42 [PATCH v2 00/11] Some cleanups for memory-failure Matthew Wilcox (Oracle)
2024-04-08 19:42 ` [PATCH v2 01/11] mm/memory-failure: Remove fsdax_pgoff argument from __add_to_kill Matthew Wilcox (Oracle)
2024-04-10  9:09   ` Oscar Salvador
2024-04-08 19:42 ` [PATCH v2 02/11] mm/memory-failure: Pass addr to __add_to_kill() Matthew Wilcox (Oracle)
2024-04-08 22:32   ` Jane Chu
2024-04-10  9:11   ` Oscar Salvador
2024-04-08 19:42 ` [PATCH v2 03/11] mm: Return the address from page_mapped_in_vma() Matthew Wilcox (Oracle)
2024-04-08 22:38   ` Jane Chu
2024-04-10  9:38   ` Oscar Salvador
2024-04-11  2:56     ` Miaohe Lin
2024-04-11 17:37     ` Matthew Wilcox
2024-04-08 19:42 ` [PATCH v2 04/11] mm: Make page_mapped_in_vma conditional on CONFIG_MEMORY_FAILURE Matthew Wilcox (Oracle)
2024-04-08 22:45   ` Jane Chu
2024-04-08 22:52     ` Matthew Wilcox
2024-04-09  6:35       ` Jane Chu
2024-04-10  9:39   ` Oscar Salvador
2024-04-08 19:42 ` [PATCH v2 05/11] mm/memory-failure: Convert shake_page() to shake_folio() Matthew Wilcox (Oracle)
2024-04-08 22:53   ` Jane Chu
2024-04-08 19:42 ` [PATCH v2 06/11] mm: Convert hugetlb_page_mapping_lock_write to folio Matthew Wilcox (Oracle)
2024-04-08 23:09   ` Jane Chu
2024-04-10  9:52   ` Oscar Salvador
2024-04-08 19:42 ` [PATCH v2 07/11] mm/memory-failure: Convert memory_failure() to use a folio Matthew Wilcox (Oracle)
2024-04-09  0:34   ` Jane Chu
2024-04-10 10:21     ` Oscar Salvador
2024-04-10 14:23       ` Matthew Wilcox
2024-04-10 15:30         ` Oscar Salvador
2024-04-10 23:15       ` Jane Chu
2024-04-11  1:27         ` Jane Chu
2024-04-11  1:51           ` Matthew Wilcox
2024-04-11  9:00             ` Miaohe Lin
2024-04-11 11:23               ` Oscar Salvador
2024-04-11 12:17                 ` Matthew Wilcox
2024-04-12 19:48               ` Matthew Wilcox
2024-04-12 22:09                 ` Oscar Salvador
2024-04-15 18:47                 ` Jane Chu
2024-04-16  9:13                 ` Miaohe Lin
2024-04-08 19:42 ` [PATCH v2 08/11] mm/memory-failure: Convert hwpoison_user_mappings to take " Matthew Wilcox (Oracle)
2024-04-09  6:15   ` Jane Chu
2024-04-08 19:42 ` [PATCH v2 09/11] mm/memory-failure: Add some folio conversions to unpoison_memory Matthew Wilcox (Oracle)
2024-04-09  6:17   ` Jane Chu
2024-04-08 19:42 ` [PATCH v2 10/11] mm/memory-failure: Use folio functions throughout collect_procs() Matthew Wilcox (Oracle)
2024-04-09  6:19   ` Jane Chu
2024-04-08 19:42 ` [PATCH v2 11/11] mm/memory-failure: Pass the folio to collect_procs_ksm() Matthew Wilcox (Oracle)
2024-04-09  6:27   ` Jane Chu
2024-04-09 12:11     ` Matthew Wilcox
2024-04-09 15:15       ` Jane Chu
2024-04-09  6:28 ` [PATCH v2 00/11] Some cleanups for memory-failure Jane Chu
2024-04-09 12:12   ` Matthew Wilcox

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.