* [to-be-updated] mmhwpoison-make-get_hwpoison_page-call-get_any_page.patch removed from -mm tree
@ 2021-05-25 23:56 akpm
0 siblings, 0 replies; 2+ messages in thread
From: akpm @ 2021-05-25 23:56 UTC (permalink / raw)
To: mhocko, mike.kravetz, mm-commits, naoya.horiguchi, songmuchun, tony.luck
The patch titled
Subject: mm,hwpoison: make get_hwpoison_page call get_any_page()
has been removed from the -mm tree. Its filename was
mmhwpoison-make-get_hwpoison_page-call-get_any_page.patch
This patch was dropped because an updated version will be merged
------------------------------------------------------
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Subject: mm,hwpoison: make get_hwpoison_page call get_any_page()
__get_hwpoison_page() could fail to grab refcount by some race condition,
so it's helpful if we can handle it by retrying. We already have retry
logic, so make get_hwpoison_page() call get_any_page() when called from
memory_failure().
Link: https://lkml.kernel.org/r/20210518231259.2553203-3-nao.horiguchi@gmail.com
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/hugetlb.c | 2
mm/memory-failure.c | 110 ++++++++++++++++++++++--------------------
2 files changed, 62 insertions(+), 50 deletions(-)
--- a/mm/hugetlb.c~mmhwpoison-make-get_hwpoison_page-call-get_any_page
+++ a/mm/hugetlb.c
@@ -5857,6 +5857,8 @@ int get_hwpoison_huge_page(struct page *
*hugetlb = true;
if (HPageFreed(page) || HPageMigratable(page))
ret = get_page_unless_zero(page);
+ else
+ ret = -EBUSY;
}
spin_unlock_irq(&hugetlb_lock);
return ret;
--- a/mm/memory-failure.c~mmhwpoison-make-get_hwpoison_page-call-get_any_page
+++ a/mm/memory-failure.c
@@ -949,13 +949,6 @@ static int page_action(struct page_state
return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
}
-/**
- * __get_hwpoison_page() - Get refcount for memory error handling:
- * @page: raw error page (hit by memory error)
- *
- * Return: return 0 if failed to grab the refcount, otherwise true (some
- * non-zero value.)
- */
static int __get_hwpoison_page(struct page *page)
{
struct page *head = compound_head(page);
@@ -992,15 +985,6 @@ static int __get_hwpoison_page(struct pa
return 0;
}
-/*
- * Safely get reference count of an arbitrary page.
- *
- * Returns 0 for a free page, 1 for an in-use page,
- * -EIO for a page-type we cannot handle and -EBUSY if we raced with an
- * allocation.
- * We only incremented refcount in case the page was already in-use and it
- * is a known type we can handle.
- */
static int get_any_page(struct page *p, unsigned long flags)
{
int ret = 0, pass = 0;
@@ -1010,50 +994,76 @@ static int get_any_page(struct page *p,
count_increased = true;
try_again:
- if (!count_increased && !__get_hwpoison_page(p)) {
- if (page_count(p)) {
- /* We raced with an allocation, retry. */
- if (pass++ < 3)
- goto try_again;
- ret = -EBUSY;
- } else if (!PageHuge(p) && !is_free_buddy_page(p)) {
- /* We raced with put_page, retry. */
+ if (!count_increased) {
+ ret = __get_hwpoison_page(p);
+ if (!ret) {
+ if (page_count(p)) {
+ /* We raced with an allocation, retry. */
+ if (pass++ < 3)
+ goto try_again;
+ ret = -EBUSY;
+ } else if (!PageHuge(p) && !is_free_buddy_page(p)) {
+ /* We raced with put_page, retry. */
+ if (pass++ < 3)
+ goto try_again;
+ ret = -EIO;
+ }
+ goto out;
+ } else if (ret == -EBUSY) {
+ /* We raced with freeing huge page to buddy, retry. */
if (pass++ < 3)
goto try_again;
- ret = -EIO;
}
+ }
+
+ if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
+ ret = 1;
} else {
- if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
- ret = 1;
- } else {
- /*
- * A page we cannot handle. Check whether we can turn
- * it into something we can handle.
- */
- if (pass++ < 3) {
- put_page(p);
- shake_page(p, 1);
- count_increased = false;
- goto try_again;
- }
+ /*
+ * A page we cannot handle. Check whether we can turn
+ * it into something we can handle.
+ */
+ if (pass++ < 3) {
put_page(p);
- ret = -EIO;
+ shake_page(p, 1);
+ count_increased = false;
+ goto try_again;
}
+ put_page(p);
+ ret = -EIO;
}
-
+out:
return ret;
}
-static int get_hwpoison_page(struct page *p, unsigned long flags,
- enum mf_flags ctxt)
+/**
+ * get_hwpoison_page() - Get refcount for memory error handling
+ * @p: Raw error page (hit by memory error)
+ * @flags: Flags controlling behavior of error handling
+ *
+ * get_hwpoison_page() takes a page refcount of an error page to handle memory
+ * error on it, after checking that the error page is in a well-defined state
+ * (defined as a page-type we can successfully handle the memor error on it,
+ * such as LRU page and hugetlb page).
+ *
+ * Memory error handling could be triggered at any time on any type of page,
+ * so it's prone to race with typical memory management lifecycle (like
+ * allocation and free). So to avoid such races, get_hwpoison_page() takes
+ * extra care for the error page's state (as done in __get_hwpoison_page()),
+ * and has some retry logic in get_any_page().
+ *
+ * Return: 0 on failure,
+ * 1 on success for in-use pages in a well-defined state,
+ * -EIO for pages on which we can not handle memory errors,
+ * -EBUSY when get_hwpoison_page() has raced with page lifecycle
+ * operations like allocation and free.
+ */
+static int get_hwpoison_page(struct page *p, unsigned long flags)
{
int ret;
zone_pcp_disable(page_zone(p));
- if (ctxt == MF_SOFT_OFFLINE)
- ret = get_any_page(p, flags);
- else
- ret = __get_hwpoison_page(p);
+ ret = get_any_page(p, flags);
zone_pcp_enable(page_zone(p));
return ret;
@@ -1239,7 +1249,7 @@ static int memory_failure_hugetlb(unsign
num_poisoned_pages_inc();
- if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
+ if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags)) {
/*
* Check "filter hit" and "race with other subpage."
*/
@@ -1453,7 +1463,7 @@ try_again:
* In fact it's dangerous to directly bump up page count from 0,
* that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
*/
- if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
+ if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags)) {
if (is_free_buddy_page(p)) {
if (take_page_off_buddy(p)) {
page_ref_inc(p);
@@ -1741,7 +1751,7 @@ int unpoison_memory(unsigned long pfn)
return 0;
}
- if (!get_hwpoison_page(p, flags, 0)) {
+ if (!get_hwpoison_page(p, flags)) {
if (TestClearPageHWPoison(p))
num_poisoned_pages_dec();
unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
@@ -1957,7 +1967,7 @@ int soft_offline_page(unsigned long pfn,
retry:
get_online_mems();
- ret = get_hwpoison_page(page, flags, MF_SOFT_OFFLINE);
+ ret = get_hwpoison_page(page, flags);
put_online_mems();
if (ret > 0) {
_
Patches currently in -mm which might be from naoya.horiguchi@nec.com are
mmhwpoison-send-sigbus-with-error-virutal-address.patch
^ permalink raw reply [flat|nested] 2+ messages in thread
* [to-be-updated] mmhwpoison-make-get_hwpoison_page-call-get_any_page.patch removed from -mm tree
@ 2021-05-13 22:36 akpm
0 siblings, 0 replies; 2+ messages in thread
From: akpm @ 2021-05-13 22:36 UTC (permalink / raw)
To: mhocko, mike.kravetz, mm-commits, naoya.horiguchi, osalvador,
songmuchun, tony.luck
The patch titled
Subject: mm,hwpoison: make get_hwpoison_page call get_any_page()
has been removed from the -mm tree. Its filename was
mmhwpoison-make-get_hwpoison_page-call-get_any_page.patch
This patch was dropped because an updated version will be merged
------------------------------------------------------
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Subject: mm,hwpoison: make get_hwpoison_page call get_any_page()
Now __get_hwpoison_page() could return -EBUSY in a race condition, so it's
helpful to handle it by retrying. We already have retry logic, so make
get_hwpoison_page() call get_any_page() when called from memory_failure().
Link: https://lkml.kernel.org/r/20210511151016.2310627-3-nao.horiguchi@gmail.com
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/memory-failure.c | 114 ++++++++++++++++++++++--------------------
1 file changed, 62 insertions(+), 52 deletions(-)
--- a/mm/memory-failure.c~mmhwpoison-make-get_hwpoison_page-call-get_any_page
+++ a/mm/memory-failure.c
@@ -949,13 +949,6 @@ static int page_action(struct page_state
return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
}
-/**
- * __get_hwpoison_page() - Get refcount for memory error handling:
- * @page: raw error page (hit by memory error)
- *
- * Return: return 0 if failed to grab the refcount, otherwise true (some
- * non-zero value.)
- */
static int __get_hwpoison_page(struct page *page)
{
struct page *head = compound_head(page);
@@ -999,15 +992,6 @@ static int __get_hwpoison_page(struct pa
return get_page_unless_zero(page);
}
-/*
- * Safely get reference count of an arbitrary page.
- *
- * Returns 0 for a free page, 1 for an in-use page,
- * -EIO for a page-type we cannot handle and -EBUSY if we raced with an
- * allocation.
- * We only incremented refcount in case the page was already in-use and it
- * is a known type we can handle.
- */
static int get_any_page(struct page *p, unsigned long flags)
{
int ret = 0, pass = 0;
@@ -1017,50 +1001,76 @@ static int get_any_page(struct page *p,
count_increased = true;
try_again:
- if (!count_increased && !__get_hwpoison_page(p)) {
- if (page_count(p)) {
- /* We raced with an allocation, retry. */
- if (pass++ < 3)
- goto try_again;
- ret = -EBUSY;
- } else if (!PageHuge(p) && !is_free_buddy_page(p)) {
- /* We raced with put_page, retry. */
- if (pass++ < 3)
- goto try_again;
- ret = -EIO;
+ if (!count_increased) {
+ ret = __get_hwpoison_page(p);
+ if (!ret) {
+ if (page_count(p)) {
+ /* We raced with an allocation, retry. */
+ if (pass++ < 3)
+ goto try_again;
+ ret = -EBUSY;
+ } else if (!PageHuge(p) && !is_free_buddy_page(p)) {
+ /* We raced with put_page, retry. */
+ if (pass++ < 3)
+ goto try_again;
+ ret = -EIO;
+ }
+ goto out;
}
+ }
+
+ if (ret == -EBUSY) {
+ /* We raced with freeing huge page to buddy, retry. */
+ if (pass++ < 3)
+ goto try_again;
+ } else if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
+ ret = 1;
} else {
- if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
- ret = 1;
- } else {
- /*
- * A page we cannot handle. Check whether we can turn
- * it into something we can handle.
- */
- if (pass++ < 3) {
- put_page(p);
- shake_page(p, 1);
- count_increased = false;
- goto try_again;
- }
+ /*
+ * A page we cannot handle. Check whether we can turn
+ * it into something we can handle.
+ */
+ if (pass++ < 3) {
put_page(p);
- ret = -EIO;
+ shake_page(p, 1);
+ count_increased = false;
+ goto try_again;
}
+ put_page(p);
+ ret = -EIO;
}
-
+out:
return ret;
}
-static int get_hwpoison_page(struct page *p, unsigned long flags,
- enum mf_flags ctxt)
+/**
+ * get_hwpoison_page() - Get refcount for memory error handling
+ * @p: Raw error page (hit by memory error)
+ * @flags: Flags controlling behavior of error handling
+ *
+ * get_hwpoison_page() takes a page refcount of an error page to handle memory
+ * error on it, after checking that the error page is in a well-defined state
+ * (defined as a page-type we can successfully handle the memor error on it,
+ * such as LRU page and hugetlb page).
+ *
+ * Memory error handling could be triggered at any time on any type of page,
+ * so it's prone to race with typical memory management lifecycle (like
+ * allocation and free). So to avoid such races, get_hwpoison_page() takes
+ * extra care for the error page's state (as done in __get_hwpoison_page()),
+ * and has some retry logic in get_any_page().
+ *
+ * Return: 0 for buddy free pages,
+ * 1 on success for in-use pages in a well-defined state,
+ * -EIO for pages on which we can not handle memory errors,
+ * -EBUSY when get_hwpoison_page() has raced with page lifecycle
+ * operations like allocation and free.
+ */
+static int get_hwpoison_page(struct page *p, unsigned long flags)
{
int ret;
zone_pcp_disable(page_zone(p));
- if (ctxt == MF_SOFT_OFFLINE)
- ret = get_any_page(p, flags);
- else
- ret = __get_hwpoison_page(p);
+ ret = get_any_page(p, flags);
zone_pcp_enable(page_zone(p));
return ret;
@@ -1246,7 +1256,7 @@ static int memory_failure_hugetlb(unsign
num_poisoned_pages_inc();
- if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
+ if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags)) {
/*
* Check "filter hit" and "race with other subpage."
*/
@@ -1460,7 +1470,7 @@ try_again:
* In fact it's dangerous to directly bump up page count from 0,
* that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
*/
- if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
+ if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags)) {
if (is_free_buddy_page(p)) {
if (take_page_off_buddy(p)) {
page_ref_inc(p);
@@ -1748,7 +1758,7 @@ int unpoison_memory(unsigned long pfn)
return 0;
}
- if (!get_hwpoison_page(p, flags, 0)) {
+ if (!get_hwpoison_page(p, flags)) {
if (TestClearPageHWPoison(p))
num_poisoned_pages_dec();
unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
@@ -1964,7 +1974,7 @@ int soft_offline_page(unsigned long pfn,
retry:
get_online_mems();
- ret = get_hwpoison_page(page, flags, MF_SOFT_OFFLINE);
+ ret = get_hwpoison_page(page, flags);
put_online_mems();
if (ret > 0) {
_
Patches currently in -mm which might be from naoya.horiguchi@nec.com are
mmhwpoison-fix-race-with-compound-page-allocation.patch
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-05-25 23:56 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-25 23:56 [to-be-updated] mmhwpoison-make-get_hwpoison_page-call-get_any_page.patch removed from -mm tree akpm
-- strict thread matches above, loose matches on Subject: below --
2021-05-13 22:36 akpm
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.