linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Omar Sandoval <osandov@osandov.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>,
	Eric Biederman <ebiederm@xmission.com>,
	James Morse <james.morse@arm.com>,
	Bhupesh Sharma <bhsharma@redhat.com>,
	kernel-team@fb.com
Subject: [PATCH v3 4/8] proc/kcore: fix memory hotplug vs multiple opens race
Date: Wed, 18 Jul 2018 15:58:44 -0700	[thread overview]
Message-ID: <6106c509998779730c12400c1b996425df7d7089.1531953780.git.osandov@fb.com> (raw)
In-Reply-To: <cover.1531953780.git.osandov@fb.com>

From: Omar Sandoval <osandov@fb.com>

There's a theoretical race condition that will cause /proc/kcore to miss
a memory hotplug event:

CPU0                              CPU1
// hotplug event 1
kcore_need_update = 1

open_kcore()                      open_kcore()
    kcore_update_ram()                kcore_update_ram()
        // Walk RAM                       // Walk RAM
        __kcore_update_ram()              __kcore_update_ram()
            kcore_need_update = 0

// hotplug event 2
kcore_need_update = 1
                                              kcore_need_update = 0

Note that CPU1 set up the RAM kcore entries with the state after hotplug
event 1 but cleared the flag for hotplug event 2. The RAM entries will
therefore be stale until there is another hotplug event.

This is an extremely unlikely sequence of events, but the fix makes the
synchronization saner, anyways: we serialize the entire update sequence,
which means that whoever clears the flag will always succeed in
replacing the kcore list.

Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 fs/proc/kcore.c | 93 +++++++++++++++++++++++--------------------------
 1 file changed, 44 insertions(+), 49 deletions(-)

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index ae43a97d511d..95aa988c5b5d 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -98,53 +98,15 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
 	return size + *elf_buflen;
 }
 
-static void free_kclist_ents(struct list_head *head)
-{
-	struct kcore_list *tmp, *pos;
-
-	list_for_each_entry_safe(pos, tmp, head, list) {
-		list_del(&pos->list);
-		kfree(pos);
-	}
-}
-/*
- * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
- */
-static void __kcore_update_ram(struct list_head *list)
-{
-	int nphdr;
-	size_t size;
-	struct kcore_list *tmp, *pos;
-	LIST_HEAD(garbage);
-
-	down_write(&kclist_lock);
-	if (xchg(&kcore_need_update, 0)) {
-		list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
-			if (pos->type == KCORE_RAM
-				|| pos->type == KCORE_VMEMMAP)
-				list_move(&pos->list, &garbage);
-		}
-		list_splice_tail(list, &kclist_head);
-	} else
-		list_splice(list, &garbage);
-	proc_root_kcore->size = get_kcore_size(&nphdr, &size);
-	up_write(&kclist_lock);
-
-	free_kclist_ents(&garbage);
-}
-
-
 #ifdef CONFIG_HIGHMEM
 /*
  * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
  * because memory hole is not as big as !HIGHMEM case.
  * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
  */
-static int kcore_update_ram(void)
+static int kcore_ram_list(struct list_head *head)
 {
-	LIST_HEAD(head);
 	struct kcore_list *ent;
-	int ret = 0;
 
 	ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 	if (!ent)
@@ -152,9 +114,8 @@ static int kcore_update_ram(void)
 	ent->addr = (unsigned long)__va(0);
 	ent->size = max_low_pfn << PAGE_SHIFT;
 	ent->type = KCORE_RAM;
-	list_add(&ent->list, &head);
-	__kcore_update_ram(&head);
-	return ret;
+	list_add(&ent->list, head);
+	return 0;
 }
 
 #else /* !CONFIG_HIGHMEM */
@@ -253,11 +214,10 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 	return 1;
 }
 
-static int kcore_update_ram(void)
+static int kcore_ram_list(struct list_head *list)
 {
 	int nid, ret;
 	unsigned long end_pfn;
-	LIST_HEAD(head);
 
 	/* Not inialized....update now */
 	/* find out "max pfn" */
@@ -269,15 +229,50 @@ static int kcore_update_ram(void)
 			end_pfn = node_end;
 	}
 	/* scan 0 to max_pfn */
-	ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
-	if (ret) {
-		free_kclist_ents(&head);
+	ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private);
+	if (ret)
 		return -ENOMEM;
+	return 0;
+}
+#endif /* CONFIG_HIGHMEM */
+
+static int kcore_update_ram(void)
+{
+	LIST_HEAD(list);
+	LIST_HEAD(garbage);
+	int nphdr;
+	size_t size;
+	struct kcore_list *tmp, *pos;
+	int ret = 0;
+
+	down_write(&kclist_lock);
+	if (!xchg(&kcore_need_update, 0))
+		goto out;
+
+	ret = kcore_ram_list(&list);
+	if (ret) {
+		/* Couldn't get the RAM list, try again next time. */
+		WRITE_ONCE(kcore_need_update, 1);
+		list_splice_tail(&list, &garbage);
+		goto out;
+	}
+
+	list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
+		if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP)
+			list_move(&pos->list, &garbage);
+	}
+	list_splice_tail(&list, &kclist_head);
+
+	proc_root_kcore->size = get_kcore_size(&nphdr, &size);
+
+out:
+	up_write(&kclist_lock);
+	list_for_each_entry_safe(pos, tmp, &garbage, list) {
+		list_del(&pos->list);
+		kfree(pos);
 	}
-	__kcore_update_ram(&head);
 	return ret;
 }
-#endif /* CONFIG_HIGHMEM */
 
 /*****************************************************************************/
 /*
-- 
2.18.0

  parent reply	other threads:[~2018-07-18 23:39 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-18 22:58 [PATCH v3 0/8] /proc/kcore improvements Omar Sandoval
2018-07-18 22:58 ` [PATCH v3 1/8] proc/kcore: don't grab lock for kclist_add() Omar Sandoval
2018-07-18 22:58 ` [PATCH v3 2/8] proc/kcore: don't grab lock for memory hotplug notifier Omar Sandoval
2018-07-18 22:58 ` [PATCH v3 3/8] proc/kcore: replace kclist_lock rwlock with rwsem Omar Sandoval
2018-07-18 22:58 ` Omar Sandoval [this message]
2018-07-18 22:58 ` [PATCH v3 5/8] proc/kcore: hold lock during read Omar Sandoval
2018-07-24 15:11   ` Tetsuo Handa
2018-07-25 23:34     ` Omar Sandoval
2018-07-18 22:58 ` [PATCH v3 6/8] proc/kcore: clean up ELF header generation Omar Sandoval
2018-07-18 22:58 ` [PATCH v3 7/8] proc/kcore: optimize multiple page reads Omar Sandoval
2018-08-28 10:59   ` KASAN error in " Dominique Martinet
2018-08-29  4:04     ` [PATCH] proc/kcore: fix invalid memory access in multi-page read optimization Dominique Martinet
2018-09-04 18:03       ` Omar Sandoval
2018-09-04 22:24         ` Dominique Martinet
2018-09-04 22:35       ` [PATCH v2] " Dominique Martinet
2018-09-04 22:38         ` [PATCH v3] " Dominique Martinet
2018-09-04 22:41           ` Omar Sandoval
2018-09-05 19:56             ` Bhupesh Sharma
2018-09-05 20:57           ` Andrew Morton
2018-09-05 22:00             ` Dominique Martinet
2018-07-18 22:58 ` [PATCH v3 8/8] proc/kcore: add vmcoreinfo note to /proc/kcore Omar Sandoval

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6106c509998779730c12400c1b996425df7d7089.1531953780.git.osandov@fb.com \
    --to=osandov@osandov.com \
    --cc=adobriyan@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=bhsharma@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=james.morse@arm.com \
    --cc=kernel-team@fb.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).