linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Aili Yao <yaoaili@kingsoft.com>
To: "David Hildenbrand" <david@redhat.com>,
	"HORIGUCHI NAOYA( 堀口 直也)" <naoya.horiguchi@nec.com>,
	"Matthew Wilcox" <willy@infradead.org>,
	"akpm@linux-foundation.org" <akpm@linux-foundation.org>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"yangfeng1@kingsoft.com" <yangfeng1@kingsoft.com>,
	"sunhao2@kingsoft.com" <sunhao2@kingsoft.com>,
	Oscar Salvador <osalvador@suse.de>,
	Mike Kravetz <mike.kravetz@oracle.com>, <yaoaili@kingsoft.com>
Subject: [PATCH v7] mm/gup: check page hwpoison status for memory recovery failures.
Date: Tue, 6 Apr 2021 10:41:23 +0800	[thread overview]
Message-ID: <20210406104123.451ee3c3@alex-virtual-machine> (raw)
In-Reply-To: <20210406102346.3890ceb2@alex-virtual-machine>

When we call get_user_pages() to pin user page in memory, there may be
hwpoison page, currently, we just handle the normal case that memory
recovery jod is correctly finished, and we will not return the hwpoison
page to callers, but for other cases like memory recovery fails and the
user process related pte is not correctly set invalid, we will still
return the hwpoison page, and may touch it and lead to panic.

In gup.c, for normal page, after we call follow_page_mask(), we will
return the related page pointer; or like another hwpoison case with pte
invalid, it will return NULL. For NULL, we will handle it in if (!page)
branch. In this patch, we will filter out the hwpoison page in
follow_page_mask() and return error code for recovery failure cases.

We will check the page hwpoison status as soon as possible and avoid doing
followed normal procedure and try not to grab related pages.

Changes since v6:
- Fix wrong page pointer check in follow_trans_huge_pmd();

Signed-off-by: Aili Yao <yaoaili@kingsoft.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: stable@vger.kernel.org
---
 mm/gup.c         | 27 +++++++++++++++++++++++----
 mm/huge_memory.c | 11 ++++++++---
 mm/hugetlb.c     |  8 +++++++-
 mm/internal.h    | 13 +++++++++++++
 4 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index e40579624f10..88a93b89c03e 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -433,6 +433,9 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 			page = ERR_PTR(ret);
 			goto out;
 		}
+	} else if (PageHWPoison(page)) {
+		page = ERR_PTR(-EHWPOISON);
+		goto out;
 	}
 
 	if (flags & FOLL_SPLIT && PageTransCompound(page)) {
@@ -540,8 +543,13 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
 		page = follow_huge_pd(vma, address,
 				      __hugepd(pmd_val(pmdval)), flags,
 				      PMD_SHIFT);
-		if (page)
-			return page;
+		if (page) {
+			struct page *p = check_page_hwpoison(page);
+
+			if (p == ERR_PTR(-EHWPOISON) && flags & FOLL_GET)
+				put_page(page);
+			return p;
+		}
 		return no_page_table(vma, flags);
 	}
 retry:
@@ -643,7 +651,7 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
 	if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) {
 		page = follow_huge_pud(mm, address, pud, flags);
 		if (page)
-			return page;
+			return check_page_hwpoison(page);
 		return no_page_table(vma, flags);
 	}
 	if (is_hugepd(__hugepd(pud_val(*pud)))) {
@@ -652,6 +660,13 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
 				      PUD_SHIFT);
 		if (page)
 			return page;
+		if (page) {
+			struct page *p = check_page_hwpoison(page);
+
+			if (p == ERR_PTR(-EHWPOISON) && flags & FOLL_GET)
+				put_page(page);
+			return p;
+		}
 		return no_page_table(vma, flags);
 	}
 	if (pud_devmap(*pud)) {
@@ -1087,10 +1102,14 @@ static long __get_user_pages(struct mm_struct *mm,
 			 * struct page.
 			 */
 			goto next_page;
-		} else if (IS_ERR(page)) {
+		} else if (PTR_ERR(page) == -EHWPOISON) {
+			ret = (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT;
+			goto out;
+		}  else if (IS_ERR(page)) {
 			ret = PTR_ERR(page);
 			goto out;
 		}
+
 		if (pages) {
 			pages[i] = page;
 			flush_anon_page(vma, page, start);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ae907a9c2050..56ff2e83b67c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1349,6 +1349,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct page *page = NULL;
+	struct page *tail = NULL;
 
 	assert_spin_locked(pmd_lockptr(mm, pmd));
 
@@ -1366,6 +1367,11 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 	page = pmd_page(*pmd);
 	VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
 
+	tail = page + ((addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT);
+
+	if (PageHWPoison(tail))
+		return ERR_PTR(-EHWPOISON);
+
 	if (!try_grab_page(page, flags))
 		return ERR_PTR(-ENOMEM);
 
@@ -1405,11 +1411,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 		unlock_page(page);
 	}
 skip_mlock:
-	page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
-	VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
+	VM_BUG_ON_PAGE(!PageCompound(tail) && !is_zone_device_page(tail), tail);
 
 out:
-	return page;
+	return tail;
 }
 
 /* NUMA hinting page fault entry point for trans huge pmds */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a86a58ef132d..8b50f7eaa159 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4958,7 +4958,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 					     likely(pages) ? pages + i : NULL,
 					     vmas ? vmas + i : NULL);
 
-		if (pages) {
+		/* As we will filter out the hwpoison page, so don't try grab it */
+		if (pages && !PageHWPoison(page)) {
 			/*
 			 * try_grab_compound_head() should always succeed here,
 			 * because: a) we hold the ptl lock, and b) we've just
@@ -5581,6 +5582,11 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 	pte = huge_ptep_get((pte_t *)pmd);
 	if (pte_present(pte)) {
 		page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
+		/* if hwpoison, we don't grab it */
+		if (PageHWPoison(compound_head(page))) {
+			page = ERR_PTR(-EHWPOISON);
+			goto out;
+		}
 		/*
 		 * try_grab_page() should always succeed here, because: a) we
 		 * hold the pmd (ptl) lock, and b) we've just checked that the
diff --git a/mm/internal.h b/mm/internal.h
index 1432feec62df..049b310bc79a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -97,6 +97,19 @@ static inline void set_page_refcounted(struct page *page)
 	set_page_count(page, 1);
 }
 
+/*
+ * Check the hwposion status of any page type, and if TRUE, return ERR ptr.
+ */
+static inline struct page *check_page_hwpoison(struct page *page)
+{
+	if (PageHWPoison(page))
+		return ERR_PTR(-EHWPOISON);
+	else if (PageHuge(page) && PageHWPoison(compound_head(page)))
+		return ERR_PTR(-EHWPOISON);
+
+	return page;
+}
+
 extern unsigned long highest_memmap_pfn;
 
 /*
-- 
2.29.3



  reply	other threads:[~2021-04-06  2:41 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-17  8:37 [PATCH] mm/gup: check page posion status for coredump Aili Yao
2021-03-17  9:12 ` David Hildenbrand
2021-03-18  3:15   ` Aili Yao
2021-03-18  3:18   ` [PATCH v2] " Aili Yao
2021-03-18  4:46   ` [PATCH] " Matthew Wilcox
2021-03-18  5:34     ` Aili Yao
2021-03-19  2:44       ` [PATCH v3] " Aili Yao
2021-03-20  0:35         ` Matthew Wilcox
2021-03-22  3:40           ` Aili Yao
2021-03-22 11:33           ` [PATCH v5] mm/gup: check page hwposion " Aili Yao
2021-03-26 14:09             ` David Hildenbrand
2021-03-26 14:22               ` David Hildenbrand
2021-03-31  1:52                 ` HORIGUCHI NAOYA(堀口 直也)
2021-03-31  2:43                   ` Aili Yao
2021-03-31  4:32                     ` HORIGUCHI NAOYA(堀口 直也)
2021-03-31  6:44                       ` David Hildenbrand
2021-03-31  7:07                         ` Aili Yao
2021-04-01  2:31                         ` Aili Yao
2021-04-06  2:23                         ` [PATCH v6] mm/gup: check page hwpoison status for memory recovery failures Aili Yao
2021-04-06  2:41                           ` Aili Yao [this message]
2021-04-07  1:54                             ` [PATCH v7] " HORIGUCHI NAOYA(堀口 直也)
2021-04-07  7:48                               ` Aili Yao
2021-05-10  3:13                             ` Aili Yao
2021-03-31  6:07                   ` [PATCH v5] mm/gup: check page hwposion status for coredump Matthew Wilcox
2021-03-31  6:53                     ` HORIGUCHI NAOYA(堀口 直也)
2021-03-31  7:05                       ` David Hildenbrand
2021-03-18  8:14     ` [PATCH] mm/gup: check page posion " David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210406104123.451ee3c3@alex-virtual-machine \
    --to=yaoaili@kingsoft.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=naoya.horiguchi@nec.com \
    --cc=osalvador@suse.de \
    --cc=sunhao2@kingsoft.com \
    --cc=willy@infradead.org \
    --cc=yangfeng1@kingsoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).