[RFC,v1,09/11] mm: hwpoison: apply buddy page handling code to hard-offline
diff mbox series

Message ID 1541746035-13408-10-git-send-email-n-horiguchi@ah.jp.nec.com
State New
Headers show
Series
  • hwpoison improvement part 1
Related show

Commit Message

Naoya Horiguchi Nov. 9, 2018, 6:47 a.m. UTC
Hard-offline of free buddy pages can be handled in the same manner as
soft-offline. So this patch applies the new semantics to hard-offline to
more complete isolation of offlined page. As a result, the successful
case is worth MF_RECOVERED instead of MF_DELAYED, so this patch also
changes it.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
---
 mm/memory-failure.c | 38 ++++++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 10 deletions(-)

Patch
diff mbox series

diff --git v4.19-mmotm-2018-10-30-16-08/mm/memory-failure.c v4.19-mmotm-2018-10-30-16-08_patched/mm/memory-failure.c
index ecafd4a..af541141 100644
--- v4.19-mmotm-2018-10-30-16-08/mm/memory-failure.c
+++ v4.19-mmotm-2018-10-30-16-08_patched/mm/memory-failure.c
@@ -772,6 +772,16 @@  static int me_swapcache_clean(struct page *p, unsigned long pfn)
 		return MF_FAILED;
 }
 
+static int me_huge_free_page(struct page *p)
+{
+	int rc = dissolve_free_huge_page(p);
+
+	if (!rc && set_hwpoison_free_buddy_page(p))
+		return MF_RECOVERED;
+	else
+		return MF_FAILED;
+}
+
 /*
  * Huge pages. Needs work.
  * Issues:
@@ -799,8 +809,7 @@  static int me_huge_page(struct page *p, unsigned long pfn)
 		 */
 		if (PageAnon(hpage))
 			put_page(hpage);
-		dissolve_free_huge_page(p);
-		res = MF_RECOVERED;
+		res = me_huge_free_page(p);
 		lock_page(hpage);
 	}
 
@@ -1108,8 +1117,11 @@  static int memory_failure_hugetlb(unsigned long pfn, int flags)
 			}
 		}
 		unlock_page(head);
-		dissolve_free_huge_page(p);
-		action_result(pfn, MF_MSG_FREE_HUGE, MF_DELAYED);
+
+		res = me_huge_free_page(p);
+		if (res == MF_FAILED)
+			num_poisoned_pages_dec();
+		action_result(pfn, MF_MSG_FREE_HUGE, res);
 		return 0;
 	}
 
@@ -1270,6 +1282,13 @@  int memory_failure(unsigned long pfn, int flags)
 	p = pfn_to_page(pfn);
 	if (PageHuge(p))
 		return memory_failure_hugetlb(pfn, flags);
+
+	if (set_hwpoison_free_buddy_page(p)) {
+		action_result(pfn, MF_MSG_BUDDY, MF_RECOVERED);
+		num_poisoned_pages_inc();
+		return 0;
+	}
+
 	if (TestSetPageHWPoison(p)) {
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 			pfn);
@@ -1281,8 +1300,7 @@  int memory_failure(unsigned long pfn, int flags)
 
 	/*
 	 * We need/can do nothing about count=0 pages.
-	 * 1) it's a free page, and therefore in safe hand:
-	 *    prep_new_page() will be the gate keeper.
+	 * 1) it's a free page, and removed from buddy allocator.
 	 * 2) it's part of a non-compound high order page.
 	 *    Implies some kernel user: cannot stop them from
 	 *    R/W the page; let's pray that the page has been
@@ -1291,8 +1309,8 @@  int memory_failure(unsigned long pfn, int flags)
 	 * that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
 	 */
 	if (!get_hwpoison_page(p)) {
-		if (is_free_buddy_page(p)) {
-			action_result(pfn, MF_MSG_BUDDY, MF_DELAYED);
+		if (set_hwpoison_free_buddy_page(p)) {
+			action_result(pfn, MF_MSG_BUDDY, MF_RECOVERED);
 			return 0;
 		} else {
 			action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
@@ -1330,8 +1348,8 @@  int memory_failure(unsigned long pfn, int flags)
 	 */
 	shake_page(p, 0);
 	/* shake_page could have turned it free. */
-	if (!PageLRU(p) && is_free_buddy_page(p)) {
-		action_result(pfn, MF_MSG_BUDDY_2ND, MF_DELAYED);
+	if (!PageLRU(p) && set_hwpoison_free_buddy_page(p)) {
+		action_result(pfn, MF_MSG_BUDDY_2ND, MF_RECOVERED);
 		return 0;
 	}