[PATCH v3] x86/gart/kcore: Exclude GART aperture from kcore

From: Kairui Song <kasong@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	x86@kernel.org, Alexey Dobriyan <adobriyan@gmail.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Omar Sandoval <osandov@fb.com>, Jiri Bohac <jbohac@suse.cz>,
	Baoquan He <bhe@redhat.com>, Dave Young <dyoung@redhat.com>,
	Kairui Song <kasong@redhat.com>
Subject: [PATCH v3] x86/gart/kcore: Exclude GART aperture from kcore
Date: Wed, 13 Feb 2019 16:28:00 +0800	[thread overview]
Message-ID: <20190213082800.4400-1-kasong@redhat.com> (raw)

On machines where the GART aperture is mapped over physical RAM,
/proc/kcore contains the GART aperture range and reading it may lead
to kernel panic.

In 'commit 2a3e83c6f96c ("x86/gart: Exclude GART aperture from vmcore")',
a workaround is applied for vmcore to let /proc/vmcore return zeroes
when attempting to read the GART region, as vmcore have the same issue,
and after 'commit 707d4eefbdb3 ("Revert "[PATCH] Insert GART region
into resource map"")', userspace tools won't be able to detect GART
region so have to avoid it from being reading in kernel.

This patch applies a similar workaround for kcore. Let /proc/kcore
return zeroes for GART aperture.

Both vmcore and kcore maintain a memory mapping list, in the vmcore
workaround we exclude the GART region by registering a hook for checking
if PFN is valid before reading, because vmcore's memory mapping could
be generated by userspace which doesn't know about GART. But for kcore
it will be simpler to just alter the memory area list, kcore's area list
is always generated by kernel on init.

Kcore's memory area list is generated very late so can't exclude the
overlapped area when GART is initialized, instead simply introduce a
new area enum type KCORE_NORAM, register GART aperture as KCORE_NORAM
and let kcore return zeros for all KCORE_NORAM area. This fixes the
problem well with minor code changes.

---
Update from V2:
Instead of repeating the same hook infrastructure for kcore, introduce
a new kcore area type to avoid reading from, and let kcore always bypass
this kind of area.

Update from V1:
Fix a complie error when CONFIG_PROC_KCORE is not set

 arch/x86/kernel/aperture_64.c | 14 ++++++++++++++
 fs/proc/kcore.c               | 13 +++++++++++++
 include/linux/kcore.h         |  1 +
 3 files changed, 28 insertions(+)

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 58176b56354e..5fb04bdd3221 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -31,6 +31,7 @@
 #include <asm/amd_nb.h>
 #include <asm/x86_init.h>
 #include <linux/crash_dump.h>
+#include <linux/kcore.h>
 
 /*
  * Using 512M as goal, in case kexec will load kernel_big
@@ -84,6 +85,17 @@ static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
 }
 #endif
 
+#ifdef CONFIG_PROC_KCORE
+static struct kcore_list kcore_gart;
+
+static void __init exclude_from_kcore(u64 aper_base, u32 aper_order) {
+	u32 aper_size = (32 * 1024 * 1024) << aper_order;
+	kclist_add(&kcore_gart, __va(aper_base), aper_size, KCORE_NORAM);
+}
+#else
+static inline void __init exclude_from_kcore(u64 aper_base, u32 aper_order) { }
+#endif
+
 /* This code runs before the PCI subsystem is initialized, so just
    access the northbridge directly. */
 
@@ -475,6 +487,7 @@ int __init gart_iommu_hole_init(void)
 			 * and fixed up the northbridge
 			 */
 			exclude_from_vmcore(last_aper_base, last_aper_order);
+			exclude_from_kcore(last_aper_base, last_aper_order);
 
 			return 1;
 		}
@@ -521,6 +534,7 @@ int __init gart_iommu_hole_init(void)
 	 * range through vmcore even though it should be part of the dump.
 	 */
 	exclude_from_vmcore(aper_alloc, aper_order);
+	exclude_from_kcore(aper_alloc, aper_order);
 
 	/* Fix up the north bridges */
 	for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index bbcc185062bb..15e0d74d7c56 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -75,6 +75,8 @@ static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
 	size = 0;
 
 	list_for_each_entry(m, &kclist_head, list) {
+		if (m->type == KCORE_NORAM)
+			continue;
 		try = kc_vaddr_to_offset((size_t)m->addr + m->size);
 		if (try > size)
 			size = try;
@@ -256,6 +258,9 @@ static int kcore_update_ram(void)
 	list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
 		if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP)
 			list_move(&pos->list, &garbage);
+		/* Move NORAM area to head of the new list */
+		if (pos->type == KCORE_NORAM)
+			list_move(&pos->list, &list);
 	}
 	list_splice_tail(&list, &kclist_head);
 
@@ -356,6 +361,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 
 		phdr = &phdrs[1];
 		list_for_each_entry(m, &kclist_head, list) {
+			if (m->type == KCORE_NORAM)
+				continue;
 			phdr->p_type = PT_LOAD;
 			phdr->p_flags = PF_R | PF_W | PF_X;
 			phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset;
@@ -465,6 +472,12 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 				goto out;
 			}
 			m = NULL;	/* skip the list anchor */
+		} else if (m->type == KCORE_NORAM) {
+			/* for NORAM area just fill zero */
+			if (clear_user(buffer, tsz)) {
+				ret = -EFAULT;
+				goto out;
+			}
 		} else if (m->type == KCORE_VMALLOC) {
 			vread(buf, (char *)start, tsz);
 			/* we have to zero-fill user buffer even if no read */
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8c3f8c14eeaa..372a4093f794 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -13,6 +13,7 @@ enum kcore_type {
 	KCORE_USER,
 	KCORE_OTHER,
 	KCORE_REMAP,
+	KCORE_NORAM,
 };
 
 struct kcore_list {
-- 
2.20.1