linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@lists.01.org
Cc: hch@lst.de, linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
	jack@suse.cz
Subject: [PATCH v2 09/11] mm, memory_failure: Fix page->mapping assumptions relative to the page lock
Date: Sat, 02 Jun 2018 22:23:31 -0700	[thread overview]
Message-ID: <152800341110.17112.2806198295112832622.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <152800336321.17112.3300876636370683279.stgit@dwillia2-desk3.amr.corp.intel.com>

The current memory_failure() implementation assumes that lock_page() is
sufficient for stabilizing page->mapping and that ->mapping->host will
not be freed. The dax implementation, on the other hand, relies on
xa_lock_irq() for stabilizing the page->mapping relationship and it is
not possible to hold the lock over current routines in the
memory_failure() path that run under lock_page().

Teach the various memory_failure() helpers to pin the address_space and
revalidate page->mapping under xa_lock_irq(mapping->i_pages).

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 mm/memory-failure.c |   56 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 42a193ee14d3..b6efb78ba49b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -179,12 +179,20 @@ EXPORT_SYMBOL_GPL(hwpoison_filter);
  * ``action required'' if error happened in current execution context
  */
 static int kill_proc(struct task_struct *t, unsigned long addr,
-			unsigned long pfn, unsigned size_shift, int flags)
+		struct address_space *mapping, struct page *page,
+		unsigned size_shift, int flags)
 {
-	int ret;
+	int ret = 0;
+
+	/* revalidate the page before killing the process */
+	xa_lock_irq(&mapping->i_pages);
+	if (page->mapping != mapping) {
+		xa_unlock_irq(&mapping->i_pages);
+		return 0;
+	}
 
 	pr_err("Memory failure: %#lx: Killing %s:%d due to hardware memory corruption\n",
-		pfn, t->comm, t->pid);
+			page_to_pfn(page), t->comm, t->pid);
 
 	if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
 		ret = force_sig_mceerr(BUS_MCEERR_AR, (void __user *)addr,
@@ -199,6 +207,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr,
 		ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)addr,
 				      size_shift, t);  /* synchronous? */
 	}
+	xa_unlock_irq(&mapping->i_pages);
 	if (ret < 0)
 		pr_info("Memory failure: Error sending signal to %s:%d: %d\n",
 			t->comm, t->pid, ret);
@@ -316,8 +325,8 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
  * wrong earlier.
  */
 static void kill_procs(struct list_head *to_kill, int forcekill,
-			  bool fail, unsigned size_shift, unsigned long pfn,
-			  int flags)
+		bool fail, unsigned size_shift, struct address_space *mapping,
+		struct page *page, int flags)
 {
 	struct to_kill *tk, *next;
 
@@ -330,7 +339,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill,
 			 */
 			if (fail || tk->addr_valid == 0) {
 				pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
-				       pfn, tk->tsk->comm, tk->tsk->pid);
+						page_to_pfn(page), tk->tsk->comm,
+						tk->tsk->pid);
 				force_sig(SIGKILL, tk->tsk);
 			}
 
@@ -341,9 +351,10 @@ static void kill_procs(struct list_head *to_kill, int forcekill,
 			 * process anyways.
 			 */
 			else if (kill_proc(tk->tsk, tk->addr,
-					      pfn, size_shift, flags) < 0)
+					      mapping, page, size_shift, flags) < 0)
 				pr_err("Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n",
-				       pfn, tk->tsk->comm, tk->tsk->pid);
+						page_to_pfn(page), tk->tsk->comm,
+						tk->tsk->pid);
 		}
 		put_task_struct(tk->tsk);
 		kfree(tk);
@@ -429,21 +440,27 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
 /*
  * Collect processes when the error hit a file mapped page.
  */
-static void collect_procs_file(struct page *page, struct list_head *to_kill,
-			      struct to_kill **tkc, int force_early)
+static void collect_procs_file(struct address_space *mapping, struct page *page,
+		struct list_head *to_kill, struct to_kill **tkc,
+		int force_early)
 {
 	struct vm_area_struct *vma;
 	struct task_struct *tsk;
-	struct address_space *mapping = page->mapping;
 
 	i_mmap_lock_read(mapping);
 	read_lock(&tasklist_lock);
 	for_each_process(tsk) {
-		pgoff_t pgoff = page_to_pgoff(page);
+		pgoff_t pgoff;
 		struct task_struct *t = task_early_kill(tsk, force_early);
 
 		if (!t)
 			continue;
+		xa_lock_irq(&mapping->i_pages);
+		if (page->mapping != mapping) {
+			xa_unlock_irq(&mapping->i_pages);
+			break;
+		}
+		pgoff = page_to_pgoff(page);
 		vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
 				      pgoff) {
 			/*
@@ -456,6 +473,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 			if (vma->vm_mm == t->mm)
 				add_to_kill(t, page, vma, to_kill, tkc);
 		}
+		xa_unlock_irq(&mapping->i_pages);
 	}
 	read_unlock(&tasklist_lock);
 	i_mmap_unlock_read(mapping);
@@ -467,12 +485,12 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
  * First preallocate one tokill structure outside the spin locks,
  * so that we can kill at least one process reasonably reliable.
  */
-static void collect_procs(struct page *page, struct list_head *tokill,
-				int force_early)
+static void collect_procs(struct address_space *mapping, struct page *page,
+		struct list_head *tokill, int force_early)
 {
 	struct to_kill *tk;
 
-	if (!page->mapping)
+	if (!mapping)
 		return;
 
 	tk = kmalloc(sizeof(struct to_kill), GFP_NOIO);
@@ -481,7 +499,7 @@ static void collect_procs(struct page *page, struct list_head *tokill,
 	if (PageAnon(page))
 		collect_procs_anon(page, tokill, &tk, force_early);
 	else
-		collect_procs_file(page, tokill, &tk, force_early);
+		collect_procs_file(mapping, page, tokill, &tk, force_early);
 	kfree(tk);
 }
 
@@ -986,7 +1004,8 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	 * there's nothing that can be done.
 	 */
 	if (kill)
-		collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
+		collect_procs(mapping, hpage, &tokill,
+				flags & MF_ACTION_REQUIRED);
 
 	unmap_success = try_to_unmap(hpage, ttu);
 	if (!unmap_success)
@@ -1012,7 +1031,8 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	 */
 	forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL);
 	size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
-	kill_procs(&tokill, forcekill, !unmap_success, size_shift, pfn, flags);
+	kill_procs(&tokill, forcekill, !unmap_success, size_shift, mapping,
+			hpage, flags);
 
 	return unmap_success;
 }

  parent reply	other threads:[~2018-06-03  5:23 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-03  5:22 [PATCH v2 00/11] mm: Teach memory_failure() about ZONE_DEVICE pages Dan Williams
2018-06-03  5:22 ` [PATCH v2 01/11] device-dax: Convert to vmf_insert_mixed and vm_fault_t Dan Williams
2018-06-03  5:22 ` [PATCH v2 02/11] device-dax: Cleanup vm_fault de-reference chains Dan Williams
2018-06-03  5:22 ` [PATCH v2 03/11] device-dax: Enable page_mapping() Dan Williams
2018-06-03  5:23 ` [PATCH v2 04/11] device-dax: Set page->index Dan Williams
2018-06-03  5:23 ` [PATCH v2 05/11] filesystem-dax: " Dan Williams
2018-06-03  5:23 ` [PATCH v2 06/11] mm, madvise_inject_error: Let memory_failure() optionally take a page reference Dan Williams
2018-06-03  5:23 ` [PATCH v2 07/11] x86, memory_failure: Introduce {set, clear}_mce_nospec() Dan Williams
2018-06-04 17:08   ` Luck, Tony
2018-06-04 17:39     ` Dan Williams
2018-06-04 18:08       ` Luck, Tony
2018-06-04 18:35         ` Dan Williams
2018-06-03  5:23 ` [PATCH v2 08/11] mm, memory_failure: Pass page size to kill_proc() Dan Williams
2018-06-03  5:23 ` Dan Williams [this message]
2018-06-03  5:23 ` [PATCH v2 10/11] mm, memory_failure: Teach memory_failure() about dev_pagemap pages Dan Williams
2018-06-03  5:23 ` [PATCH v2 11/11] libnvdimm, pmem: Restore page attributes when clearing errors Dan Williams
2018-06-04 12:40 ` [PATCH v2 00/11] mm: Teach memory_failure() about ZONE_DEVICE pages Michal Hocko
2018-06-04 14:31   ` Dan Williams
2018-06-05 14:11     ` Michal Hocko
2018-06-05 14:33       ` Dan Williams
2018-06-06  7:39         ` Michal Hocko
2018-06-06 13:44           ` Dan Williams
2018-06-07 14:37             ` Michal Hocko
2018-06-07 16:52               ` Dan Williams
2018-06-11  7:50                 ` Michal Hocko
2018-06-11 14:44                   ` Dan Williams
2018-06-11 14:56                     ` Michal Hocko
2018-06-11 15:19                       ` Dan Williams
2018-06-11 17:35                         ` Andi Kleen
2018-06-12  1:50                         ` Naoya Horiguchi
2018-06-12  1:58                           ` Dan Williams
2018-06-12  4:04                           ` Jane Chu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=152800341110.17112.2806198295112832622.stgit@dwillia2-desk3.amr.corp.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).