All of lore.kernel.org
 help / color / mirror / Atom feed
* + mmhwpoison-refactor-get_any_page.patch added to -mm tree
@ 2020-12-05  1:21 akpm
  0 siblings, 0 replies; 4+ messages in thread
From: akpm @ 2020-12-05  1:21 UTC (permalink / raw)
  To: mm-commits, naoya.horiguchi, osalvador, qcai, Vbabka


The patch titled
     Subject: mm,hwpoison: refactor get_any_page
has been added to the -mm tree.  Its filename is
     mmhwpoison-refactor-get_any_page.patch

This patch should soon appear at
    https://ozlabs.org/~akpm/mmots/broken-out/mmhwpoison-refactor-get_any_page.patch
and later at
    https://ozlabs.org/~akpm/mmotm/broken-out/mmhwpoison-refactor-get_any_page.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Oscar Salvador <osalvador@suse.de>
Subject: mm,hwpoison: refactor get_any_page

Patch series "HWPoison: Refactor get page interface", v2.


This patch (of 3):

When we want to grab a refcount via get_any_page, we call __get_any_page
that calls get_hwpoison_page to get the actual refcount.

get_any_page() is only there because we have a sort of retry mechanism in
case the page we met is unknown to us or if we raced with an allocation.

Also __get_any_page() prints some messages about the page type in case the
page was a free page or the page type was unknown, but if anything, we
only need to print a message in case the pagetype was unknown, as that is
reporting an error down the chain.

Let us merge get_any_page() and __get_any_page(), and let the message be
printed in soft_offline_page.  While we are it, we can also remove the
'pfn' parameter as it is no longer used.

Link: https://lkml.kernel.org/r/20201204102558.31607-1-osalvador@suse.de
Link: https://lkml.kernel.org/r/20201204102558.31607-2-osalvador@suse.de
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Acked-by: Vlastimil Babka <Vbabka@suse.cz>
Cc: Qian Cai <qcai@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/memory-failure.c |   99 +++++++++++++++++-------------------------
 1 file changed, 42 insertions(+), 57 deletions(-)

--- a/mm/memory-failure.c~mmhwpoison-refactor-get_any_page
+++ a/mm/memory-failure.c
@@ -1707,70 +1707,51 @@ EXPORT_SYMBOL(unpoison_memory);
 
 /*
  * Safely get reference count of an arbitrary page.
- * Returns 0 for a free page, -EIO for a zero refcount page
- * that is not free, and 1 for any other page type.
- * For 1 the page is returned with increased page count, otherwise not.
+ * Returns 0 for a free page, 1 for an in-use page, -EIO for a page-type we
+ * cannot handle and -EBUSY if we raced with an allocation.
+ * We only incremented refcount in case the page was already in-use and it is
+ * a known type we can handle.
  */
-static int __get_any_page(struct page *p, unsigned long pfn, int flags)
+static int get_any_page(struct page *p, int flags)
 {
-	int ret;
+	int ret = 0, pass = 0;
+	bool count_increased = false;
 
 	if (flags & MF_COUNT_INCREASED)
-		return 1;
+		count_increased = true;
 
-	/*
-	 * When the target page is a free hugepage, just remove it
-	 * from free hugepage list.
-	 */
-	if (!get_hwpoison_page(p)) {
-		if (PageHuge(p)) {
-			pr_info("%s: %#lx free huge page\n", __func__, pfn);
-			ret = 0;
-		} else if (is_free_buddy_page(p)) {
-			pr_info("%s: %#lx free buddy page\n", __func__, pfn);
-			ret = 0;
-		} else if (page_count(p)) {
-			/* raced with allocation */
+try_again:
+	if (!count_increased && !get_hwpoison_page(p)) {
+		if (page_count(p)) {
+			/* We raced with an allocation, retry. */
+			if (pass++ < 3)
+				goto try_again;
 			ret = -EBUSY;
-		} else {
-			pr_info("%s: %#lx: unknown zero refcount page type %lx\n",
-				__func__, pfn, p->flags);
+		} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
+			/* We raced with put_page, retry. */
+			if (pass++ < 3)
+				goto try_again;
 			ret = -EIO;
 		}
 	} else {
-		/* Not a free page */
-		ret = 1;
-	}
-	return ret;
-}
-
-static int get_any_page(struct page *page, unsigned long pfn, int flags)
-{
-	int ret = __get_any_page(page, pfn, flags);
-
-	if (ret == -EBUSY)
-		ret = __get_any_page(page, pfn, flags);
-
-	if (ret == 1 && !PageHuge(page) &&
-	    !PageLRU(page) && !__PageMovable(page)) {
-		/*
-		 * Try to free it.
-		 */
-		put_page(page);
-		shake_page(page, 1);
-
-		/*
-		 * Did it turn free?
-		 */
-		ret = __get_any_page(page, pfn, 0);
-		if (ret == 1 && !PageLRU(page)) {
-			/* Drop page reference which is from __get_any_page() */
-			put_page(page);
-			pr_info("soft_offline: %#lx: unknown non LRU page type %lx (%pGp)\n",
-				pfn, page->flags, &page->flags);
-			return -EIO;
+		if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
+			ret = 1;
+		} else {
+			/*
+			 * A page we cannot handle. Check whether we can turn
+			 * it into something we can handle.
+			 */
+			if (pass++ < 3) {
+				put_page(p);
+				shake_page(p, 1);
+				count_increased = false;
+				goto try_again;
+			}
+			put_page(p);
+			ret = -EIO;
 		}
 	}
+
 	return ret;
 }
 
@@ -1939,7 +1920,7 @@ int soft_offline_page(unsigned long pfn,
 		return -EIO;
 
 	if (PageHWPoison(page)) {
-		pr_info("soft offline: %#lx page already poisoned\n", pfn);
+		pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
 		if (flags & MF_COUNT_INCREASED)
 			put_page(page);
 		return 0;
@@ -1947,16 +1928,20 @@ int soft_offline_page(unsigned long pfn,
 
 retry:
 	get_online_mems();
-	ret = get_any_page(page, pfn, flags);
+	ret = get_any_page(page, flags);
 	put_online_mems();
 
-	if (ret > 0)
+	if (ret > 0) {
 		ret = soft_offline_in_use_page(page);
-	else if (ret == 0)
+	} else if (ret == 0) {
 		if (soft_offline_free_page(page) && try_again) {
 			try_again = false;
 			goto retry;
 		}
+	} else if (ret == -EIO) {
+		pr_info("%s: %#lx: unknown page type: %lx (%pGP)\n",
+			 __func__, pfn, page->flags, &page->flags);
+	}
 
 	return ret;
 }
_

Patches currently in -mm which might be from osalvador@suse.de are

mmhwpoison-drain-pcplists-before-bailing-out-for-non-buddy-zero-refcount-page.patch
mmhwpoison-take-free-pages-off-the-buddy-freelists.patch
mmhwpoison-take-free-pages-off-the-buddy-freelists-for-hugetlb.patch
mmhwpoison-drop-unneeded-pcplist-draining.patch
mmhwpoison-refactor-get_any_page.patch
mmhwpoison-disable-pcplists-before-grabbing-a-refcount.patch
mmhwpoison-remove-drain_all_pages-from-shake_page.patch
mmhugetlb-remove-unneded-initialization.patch


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: + mmhwpoison-refactor-get_any_page.patch added to -mm tree
  2020-12-07  9:52 ` Oscar Salvador
@ 2020-12-08  2:06   ` Andrew Morton
  0 siblings, 0 replies; 4+ messages in thread
From: Andrew Morton @ 2020-12-08  2:06 UTC (permalink / raw)
  To: Oscar Salvador; +Cc: mm-commits, naoya.horiguchi

On Mon, 07 Dec 2020 10:52:29 +0100 Oscar Salvador <osalvador@suse.de> wrote:

> On 2020-11-21 00:58, akpm@linux-foundation.org wrote:
> > The -mm tree is included into linux-next and is updated
> > there every 3-4 working days
> > 
> > ------------------------------------------------------
> > From: Oscar Salvador <osalvador@suse.de>
> > Subject: mm,hwpoison: refactor get_any_page
> > 
> > Patch series "HWPoison: Refactor get page interface"
> 
> Hi Andrew,
> 
> I saw that you already pulled [1] (v2 of this patchset) into mmotm.
> I am not sure if you dropped this v1 from mmotm, but if not, would you 
> be so kind to do so?
> 

Yes, I silently dropped v1.  Believe it or not, I do try to constrain
the amount of email I spray at people ;)


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: + mmhwpoison-refactor-get_any_page.patch added to -mm tree
  2020-11-20 23:58 akpm
@ 2020-12-07  9:52 ` Oscar Salvador
  2020-12-08  2:06   ` Andrew Morton
  0 siblings, 1 reply; 4+ messages in thread
From: Oscar Salvador @ 2020-12-07  9:52 UTC (permalink / raw)
  To: akpm; +Cc: mm-commits, naoya.horiguchi

On 2020-11-21 00:58, akpm@linux-foundation.org wrote:
> The -mm tree is included into linux-next and is updated
> there every 3-4 working days
> 
> ------------------------------------------------------
> From: Oscar Salvador <osalvador@suse.de>
> Subject: mm,hwpoison: refactor get_any_page
> 
> Patch series "HWPoison: Refactor get page interface"

Hi Andrew,

I saw that you already pulled [1] (v2 of this patchset) into mmotm.
I am not sure if you dropped this v1 from mmotm, but if not, would you 
be so kind to do so?

[1] 
https://patchwork.kernel.org/project/linux-mm/cover/20201204102558.31607-1-osalvador@suse.de/

Thanks!

-- 
Oscar Salvador
SUSE L3

^ permalink raw reply	[flat|nested] 4+ messages in thread

* + mmhwpoison-refactor-get_any_page.patch added to -mm tree
@ 2020-11-20 23:58 akpm
  2020-12-07  9:52 ` Oscar Salvador
  0 siblings, 1 reply; 4+ messages in thread
From: akpm @ 2020-11-20 23:58 UTC (permalink / raw)
  To: mm-commits, naoya.horiguchi, osalvador


The patch titled
     Subject: mm,hwpoison: refactor get_any_page
has been added to the -mm tree.  Its filename is
     mmhwpoison-refactor-get_any_page.patch

This patch should soon appear at
    https://ozlabs.org/~akpm/mmots/broken-out/mmhwpoison-refactor-get_any_page.patch
and later at
    https://ozlabs.org/~akpm/mmotm/broken-out/mmhwpoison-refactor-get_any_page.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Oscar Salvador <osalvador@suse.de>
Subject: mm,hwpoison: refactor get_any_page

Patch series "HWPoison: Refactor get page interface"

This patchset does three things:

 1) Drops MF_COUNT_INCREASED trick
 2) Refactors get page interface
 3) Places a common entry for grabbin a page from both hard offline
    and soft offline guarded by zone_pcp_{disable/enable}, so we do not
    have to drain pcplists by ourself and retry again.

Note that the MF_COUNT_INCREASED trick was left because if get_hwpoison_page
races with put_page (e.g:)

CPU0                         CPU1
put_page (refcount decremented to 0)
 __put_single_page
  free_unref_page
   free_unref_page_prepare
    free_pcp_prepare
     free_pages_prepare                           soft_offline_page
     :page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP     get_any_page
                            			    get_hwpoison_page
   free_unref_page_commit
    free_one_page
     __free_one_page (place it in buddy)

get_hwpoison_page sees that page has a refcount of 0, but since it was not
placed in buddy yet we cannot really handle it.  We now have a sort of
maximum passes in get_any_page, so in case we race with either an
allocation or a put_page, we retry again.

After an off-list discussion with Naoya, he agreed to proceed.

[1] https://patchwork.kernel.org/project/linux-mm/list/?series=364009
[2] https://patchwork.kernel.org/project/linux-mm/list/?series=381903


This patch (of 7):

When we want to grab a refcount via get_any_page, we call __get_any_page
that calls get_hwpoison_page to get the actual refcount.

get_any_page is only there because we have a sort of retry mechanism in
case the page we met is unknown to us or if we raced with an allocation.

Also __get_any_page prints some messages about the page type in case the
page was a free page or the page type was unknown, but if anything, we
only need to print a message in case the pagetype was unknown, as that is
reporting an error down the chain.

Let us merge get_any_page and __get_any_page, and let the message be
printed in soft_offline_page.

Link: https://lkml.kernel.org/r/20201119105716.5962-1-osalvador@suse.de
Link: https://lkml.kernel.org/r/20201119105716.5962-2-osalvador@suse.de
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/memory-failure.c |   91 +++++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 54 deletions(-)

--- a/mm/memory-failure.c~mmhwpoison-refactor-get_any_page
+++ a/mm/memory-failure.c
@@ -1707,70 +1707,49 @@ EXPORT_SYMBOL(unpoison_memory);
 
 /*
  * Safely get reference count of an arbitrary page.
- * Returns 0 for a free page, -EIO for a zero refcount page
- * that is not free, and 1 for any other page type.
- * For 1 the page is returned with increased page count, otherwise not.
+ * Returns 0 for a free page, 1 for an in-use page, -EIO for a page-type we
+ * cannot handle and -EBUSY if we raced with an allocation.
+ * We only incremented refcount in case the page was already in-use and it is
+ * a known type we can handle.
  */
-static int __get_any_page(struct page *p, unsigned long pfn, int flags)
+static int get_any_page(struct page *p, unsigned long pfn, int flags)
 {
-	int ret;
+	int ret = 0, pass = 0;
 
 	if (flags & MF_COUNT_INCREASED)
 		return 1;
 
-	/*
-	 * When the target page is a free hugepage, just remove it
-	 * from free hugepage list.
-	 */
+try_again:
 	if (!get_hwpoison_page(p)) {
-		if (PageHuge(p)) {
-			pr_info("%s: %#lx free huge page\n", __func__, pfn);
-			ret = 0;
-		} else if (is_free_buddy_page(p)) {
-			pr_info("%s: %#lx free buddy page\n", __func__, pfn);
-			ret = 0;
-		} else if (page_count(p)) {
-			/* raced with allocation */
+		if (page_count(p)) {
+			/* We raced with an allocation, retry. */
+			if (pass++ < 3)
+				goto try_again;
 			ret = -EBUSY;
-		} else {
-			pr_info("%s: %#lx: unknown zero refcount page type %lx\n",
-				__func__, pfn, p->flags);
+		} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
+			/* We raced with put_page, retry. */
+			if (pass++ < 3)
+				goto try_again;
 			ret = -EIO;
 		}
 	} else {
-		/* Not a free page */
-		ret = 1;
-	}
-	return ret;
-}
-
-static int get_any_page(struct page *page, unsigned long pfn, int flags)
-{
-	int ret = __get_any_page(page, pfn, flags);
-
-	if (ret == -EBUSY)
-		ret = __get_any_page(page, pfn, flags);
-
-	if (ret == 1 && !PageHuge(page) &&
-	    !PageLRU(page) && !__PageMovable(page)) {
-		/*
-		 * Try to free it.
-		 */
-		put_page(page);
-		shake_page(page, 1);
-
-		/*
-		 * Did it turn free?
-		 */
-		ret = __get_any_page(page, pfn, 0);
-		if (ret == 1 && !PageLRU(page)) {
-			/* Drop page reference which is from __get_any_page() */
-			put_page(page);
-			pr_info("soft_offline: %#lx: unknown non LRU page type %lx (%pGp)\n",
-				pfn, page->flags, &page->flags);
-			return -EIO;
+		if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
+			ret = 1;
+		} else {
+			/*
+			 * A page we cannot handle. Check whether we can turn
+			 * it into something we can handle.
+			 */
+			if (pass++ < 3) {
+				put_page(p);
+				shake_page(p, 1);
+				goto try_again;
+			}
+			put_page(p);
+			ret = -EIO;
 		}
 	}
+
 	return ret;
 }
 
@@ -1939,7 +1918,7 @@ int soft_offline_page(unsigned long pfn,
 		return -EIO;
 
 	if (PageHWPoison(page)) {
-		pr_info("soft offline: %#lx page already poisoned\n", pfn);
+		pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
 		if (flags & MF_COUNT_INCREASED)
 			put_page(page);
 		return 0;
@@ -1950,13 +1929,17 @@ retry:
 	ret = get_any_page(page, pfn, flags);
 	put_online_mems();
 
-	if (ret > 0)
+	if (ret > 0) {
 		ret = soft_offline_in_use_page(page);
-	else if (ret == 0)
+	} else if (ret == 0) {
 		if (soft_offline_free_page(page) && try_again) {
 			try_again = false;
 			goto retry;
 		}
+	} else if (ret == -EIO) {
+		pr_info("%s: %#lx: unknown page type: %lx (%pGP)\n",
+			 __func__, pfn, page->flags, &page->flags);
+	}
 
 	return ret;
 }
_

Patches currently in -mm which might be from osalvador@suse.de are

mmhwpoison-drain-pcplists-before-bailing-out-for-non-buddy-zero-refcount-page.patch
mmhwpoison-take-free-pages-off-the-buddy-freelists.patch
mmhwpoison-take-free-pages-off-the-buddy-freelists-for-hugetlb.patch
mmhwpoison-drop-unneeded-pcplist-draining.patch
mmhwpoison-refactor-get_any_page.patch
mmhwpoison-drop-pfn-parameter.patch
mmhwpoison-disable-pcplists-before-grabbing-a-refcount.patch
mmhwpoison-remove-drain_all_pages-from-shake_page.patch
mmhugetlb-remove-unneded-initialization.patch


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-12-08  2:07 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-05  1:21 + mmhwpoison-refactor-get_any_page.patch added to -mm tree akpm
  -- strict thread matches above, loose matches on Subject: below --
2020-11-20 23:58 akpm
2020-12-07  9:52 ` Oscar Salvador
2020-12-08  2:06   ` Andrew Morton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.