[v2] x86/gart/kcore: Exclude GART aperture from kcore
diff mbox series

Message ID 20190102105408.7124-1-kasong@redhat.com
State New, archived
Headers show
Series
  • [v2] x86/gart/kcore: Exclude GART aperture from kcore
Related show

Commit Message

Kairui Song Jan. 2, 2019, 10:54 a.m. UTC
On machines where the GART aperture is mapped over physical RAM,
/proc/kcore contains the GART aperture range and reading it may lead
to kernel panic.

In 'commit 2a3e83c6f96c ("x86/gart: Exclude GART aperture from vmcore")',
a special workaround is applied for vmcore to let /proc/vmcore return
zeroes when attempting to read the aperture region, as vmcore and kcore
have the same issue, and after 'commit 707d4eefbdb3 ("Revert "[PATCH]
Insert GART region into resource map"")', userspace tools can't detect and
exclude GART region.

This patch applies the same workaround for kcore. Let /proc/kcore return
zeroes too when trying to read the aperture region to fix the issue that
reading GART region may raise unexpected exceptions.

This applies to both first and second kernels as GART may get
initialized in the first and second kernels.

To get the same workaround work for kcore, this patch implement a hook
infrastructure for kcore which is same as the hook infrastructure for
vmcore introduced in 'commit 997c136f518c ("fs/proc/vmcore.c: add hook
to read_from_oldmem() to check for non-ram pages")'', and reuses the
checking function gart_oldmem_pfn_is_ram introduced in
'commit 2a3e83c6f96c ("x86/gart: Exclude GART aperture from vmcore"),'
as the hook function, but rename to gart_mem_pfn_is_ram as now it's
for a more generic use.

Suggested-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Kairui Song <kasong@redhat.com>
---
Update from V1:
- Fix a complie error when CONFIG_PROC_KCORE is not set

 arch/x86/kernel/aperture_64.c | 12 +++++++++---
 fs/proc/kcore.c               | 34 ++++++++++++++++++++++++++++++++++
 include/linux/kcore.h         |  3 +++
 3 files changed, 46 insertions(+), 3 deletions(-)

Comments

Baoquan He Jan. 23, 2019, 2:14 p.m. UTC | #1
On 01/02/19 at 06:54pm, Kairui Song wrote:
> diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
> index 58176b56354e..c8a56f083419 100644
> --- a/arch/x86/kernel/aperture_64.c
> +++ b/arch/x86/kernel/aperture_64.c
> @@ -14,6 +14,7 @@
>  #define pr_fmt(fmt) "AGP: " fmt
>  
>  #include <linux/kernel.h>
> +#include <linux/kcore.h>
>  #include <linux/types.h>
>  #include <linux/init.h>
>  #include <linux/memblock.h>
> @@ -57,7 +58,7 @@ int fallback_aper_force __initdata;
>  
>  int fix_aperture __initdata = 1;
>  
> -#ifdef CONFIG_PROC_VMCORE
> +#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE)
>  /*
>   * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
>   * use the same range because it will remain configured in the northbridge.
> @@ -66,7 +67,7 @@ int fix_aperture __initdata = 1;
>   */
>  static unsigned long aperture_pfn_start, aperture_page_count;
>  
> -static int gart_oldmem_pfn_is_ram(unsigned long pfn)
> +static int gart_mem_pfn_is_ram(unsigned long pfn)
>  {
>  	return likely((pfn < aperture_pfn_start) ||
>  		      (pfn >= aperture_pfn_start + aperture_page_count));
> @@ -76,7 +77,12 @@ static void exclude_from_vmcore(u64 aper_base, u32 aper_order)

Shouldn't this function name be changed? It's not only handling vmcore
stuff any more, but also kcore. And this function is not excluding, but
resgistering.

Other than this, it looks good to me.

Thanks
Baoquan

>  {
>  	aperture_pfn_start = aper_base >> PAGE_SHIFT;
>  	aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
> -	WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram));
> +#ifdef CONFIG_PROC_VMCORE
> +	WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
> +#endif
> +#ifdef CONFIG_PROC_KCORE
> +	WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
> +#endif
>  }
>  #else
>  static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
Kairui Song Jan. 23, 2019, 2:50 p.m. UTC | #2
On Wed, Jan 23, 2019 at 10:14 PM Baoquan He <bhe@redhat.com> wrote:
>
> On 01/02/19 at 06:54pm, Kairui Song wrote:
> > diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
> > index 58176b56354e..c8a56f083419 100644
> > --- a/arch/x86/kernel/aperture_64.c
> > +++ b/arch/x86/kernel/aperture_64.c
> > @@ -14,6 +14,7 @@
> >  #define pr_fmt(fmt) "AGP: " fmt
> >
> >  #include <linux/kernel.h>
> > +#include <linux/kcore.h>
> >  #include <linux/types.h>
> >  #include <linux/init.h>
> >  #include <linux/memblock.h>
> > @@ -57,7 +58,7 @@ int fallback_aper_force __initdata;
> >
> >  int fix_aperture __initdata = 1;
> >
> > -#ifdef CONFIG_PROC_VMCORE
> > +#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE)
> >  /*
> >   * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
> >   * use the same range because it will remain configured in the northbridge.
> > @@ -66,7 +67,7 @@ int fix_aperture __initdata = 1;
> >   */
> >  static unsigned long aperture_pfn_start, aperture_page_count;
> >
> > -static int gart_oldmem_pfn_is_ram(unsigned long pfn)
> > +static int gart_mem_pfn_is_ram(unsigned long pfn)
> >  {
> >       return likely((pfn < aperture_pfn_start) ||
> >                     (pfn >= aperture_pfn_start + aperture_page_count));
> > @@ -76,7 +77,12 @@ static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
>
> Shouldn't this function name be changed? It's not only handling vmcore
> stuff any more, but also kcore. And this function is not excluding, but
> resgistering.
>
> Other than this, it looks good to me.
>
> Thanks
> Baoquan
>

Good suggestion, it's good to change this function name too to avoid
any misleading. This patch hasn't got any other reviews recently, I'll
update it shortly.
Baoquan He Jan. 24, 2019, 2:17 a.m. UTC | #3
On 01/23/19 at 10:50pm, Kairui Song wrote:
> > >  int fix_aperture __initdata = 1;
> > >
> > > -#ifdef CONFIG_PROC_VMCORE
> > > +#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE)
> > >  /*
> > >   * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
> > >   * use the same range because it will remain configured in the northbridge.
> > > @@ -66,7 +67,7 @@ int fix_aperture __initdata = 1;
> > >   */
> > >  static unsigned long aperture_pfn_start, aperture_page_count;
> > >
> > > -static int gart_oldmem_pfn_is_ram(unsigned long pfn)
> > > +static int gart_mem_pfn_is_ram(unsigned long pfn)
> > >  {
> > >       return likely((pfn < aperture_pfn_start) ||
> > >                     (pfn >= aperture_pfn_start + aperture_page_count));
> > > @@ -76,7 +77,12 @@ static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
> >
> > Shouldn't this function name be changed? It's not only handling vmcore
> > stuff any more, but also kcore. And this function is not excluding, but
> > resgistering.
> >
> > Other than this, it looks good to me.
> >
> > Thanks
> > Baoquan
> >
> 
> Good suggestion, it's good to change this function name too to avoid
> any misleading. This patch hasn't got any other reviews recently, I'll
> update it shortly.

There's more.

These two are doing the same thing:
  register_mem_pfn_is_ram
  register_oldmem_pfn_is_ram

Need remove one of them and put it in a right place. Furthermore, may
need see if there's existing function which is used to register a
function to a hook.

Secondly, exclude_from_vmcore() is not excluding anthing, it's only
registering a function which is used to judge if oldmem/pfn is ram. Need
rename it.

Thanks
Baoquan
Kairui Song Feb. 19, 2019, 8 a.m. UTC | #4
On Thu, Jan 24, 2019 at 10:17 AM Baoquan He <bhe@redhat.com> wrote:
>
> On 01/23/19 at 10:50pm, Kairui Song wrote:
> > > >  int fix_aperture __initdata = 1;
> > > >
> > > > -#ifdef CONFIG_PROC_VMCORE
> > > > +#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE)
> > > >  /*
> > > >   * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
> > > >   * use the same range because it will remain configured in the northbridge.
> > > > @@ -66,7 +67,7 @@ int fix_aperture __initdata = 1;
> > > >   */
> > > >  static unsigned long aperture_pfn_start, aperture_page_count;
> > > >
> > > > -static int gart_oldmem_pfn_is_ram(unsigned long pfn)
> > > > +static int gart_mem_pfn_is_ram(unsigned long pfn)
> > > >  {
> > > >       return likely((pfn < aperture_pfn_start) ||
> > > >                     (pfn >= aperture_pfn_start + aperture_page_count));
> > > > @@ -76,7 +77,12 @@ static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
> > >
> > > Shouldn't this function name be changed? It's not only handling vmcore
> > > stuff any more, but also kcore. And this function is not excluding, but
> > > resgistering.
> > >
> > > Other than this, it looks good to me.
> > >
> > > Thanks
> > > Baoquan
> > >
> >
> > Good suggestion, it's good to change this function name too to avoid
> > any misleading. This patch hasn't got any other reviews recently, I'll
> > update it shortly.
>
> There's more.
>
> These two are doing the same thing:
>   register_mem_pfn_is_ram
>   register_oldmem_pfn_is_ram
>
> Need remove one of them and put it in a right place. Furthermore, may
> need see if there's existing function which is used to register a
> function to a hook.
>
> Secondly, exclude_from_vmcore() is not excluding anthing, it's only
> registering a function which is used to judge if oldmem/pfn is ram. Need
> rename it.
>
> Thanks
> Baoquan

Thanks a lot for the review! I've sent V3, using a different approach.
It's true repeating the hook infrastructure cause duplication, but I
see vmcore/kcore didn't share much code, so instead of sharing a
common hook infrastructure / registering entry, I used a new kcore
memory mapping list enum type to fix it, it also introduced less code.
Please have a look at V3, let me know how you think about it, thanks!


--
Best Regards,
Kairui Song
Kairui Song March 6, 2019, 8:48 a.m. UTC | #5
On Tue, Feb 19, 2019 at 4:00 PM Kairui Song <kasong@redhat.com> wrote:
>
> On Thu, Jan 24, 2019 at 10:17 AM Baoquan He <bhe@redhat.com> wrote:
> >
> > On 01/23/19 at 10:50pm, Kairui Song wrote:
> > > > >  int fix_aperture __initdata = 1;
> > > > >
> > > > > -#ifdef CONFIG_PROC_VMCORE
> > > > > +#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE)
> > > > >  /*
> > > > >   * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
> > > > >   * use the same range because it will remain configured in the northbridge.
> > > > > @@ -66,7 +67,7 @@ int fix_aperture __initdata = 1;
> > > > >   */
> > > > >  static unsigned long aperture_pfn_start, aperture_page_count;
> > > > >
> > > > > -static int gart_oldmem_pfn_is_ram(unsigned long pfn)
> > > > > +static int gart_mem_pfn_is_ram(unsigned long pfn)
> > > > >  {
> > > > >       return likely((pfn < aperture_pfn_start) ||
> > > > >                     (pfn >= aperture_pfn_start + aperture_page_count));
> > > > > @@ -76,7 +77,12 @@ static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
> > > >
> > > > Shouldn't this function name be changed? It's not only handling vmcore
> > > > stuff any more, but also kcore. And this function is not excluding, but
> > > > resgistering.
> > > >
> > > > Other than this, it looks good to me.
> > > >
> > > > Thanks
> > > > Baoquan
> > > >
> > >
> > > Good suggestion, it's good to change this function name too to avoid
> > > any misleading. This patch hasn't got any other reviews recently, I'll
> > > update it shortly.
> >
> > There's more.
> >
> > These two are doing the same thing:
> >   register_mem_pfn_is_ram
> >   register_oldmem_pfn_is_ram
> >
> > Need remove one of them and put it in a right place. Furthermore, may
> > need see if there's existing function which is used to register a
> > function to a hook.
> >
> > Secondly, exclude_from_vmcore() is not excluding anthing, it's only
> > registering a function which is used to judge if oldmem/pfn is ram. Need
> > rename it.
> >
> > Thanks
> > Baoquan
>

Hi Baoquan, after second thought, vmcore and kcore are doing similar
thing but still quite independent of each, didn't see any simple way
to share the logic.
And for the following naming issue I think considering the context
there is no problem, "exclude_from_vmcore(aper_alloc, aper_order)" is
clearly doing what it literally means, excluding the aperture from
vmcore.

Let me know if anything is wrong, will send V4 later reuse this approach.

--
Best Regards,
Kairui Song
Baoquan He March 6, 2019, 9:03 a.m. UTC | #6
On 03/06/19 at 04:48pm, Kairui Song wrote:
> On Tue, Feb 19, 2019 at 4:00 PM Kairui Song <kasong@redhat.com> wrote:
> Hi Baoquan, after second thought, vmcore and kcore are doing similar
> thing but still quite independent of each, didn't see any simple way
> to share the logic.
> And for the following naming issue I think considering the context
> there is no problem, "exclude_from_vmcore(aper_alloc, aper_order)" is
> clearly doing what it literally means, excluding the aperture from
> vmcore.
> 
> Let me know if anything is wrong, will send V4 later reuse this approach.

Thanks for the effort.

Yes, saw your v3. I kept quiet because I personally prefer the v2 method
which is similar to vmcore handling. So if you have investigated and
decided to do v2 way after deliberate thought, feel free to post v4. I
will make time to review.

Thanks
Baoquan

Patch
diff mbox series

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 58176b56354e..c8a56f083419 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -14,6 +14,7 @@ 
 #define pr_fmt(fmt) "AGP: " fmt
 
 #include <linux/kernel.h>
+#include <linux/kcore.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/memblock.h>
@@ -57,7 +58,7 @@  int fallback_aper_force __initdata;
 
 int fix_aperture __initdata = 1;
 
-#ifdef CONFIG_PROC_VMCORE
+#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE)
 /*
  * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
  * use the same range because it will remain configured in the northbridge.
@@ -66,7 +67,7 @@  int fix_aperture __initdata = 1;
  */
 static unsigned long aperture_pfn_start, aperture_page_count;
 
-static int gart_oldmem_pfn_is_ram(unsigned long pfn)
+static int gart_mem_pfn_is_ram(unsigned long pfn)
 {
 	return likely((pfn < aperture_pfn_start) ||
 		      (pfn >= aperture_pfn_start + aperture_page_count));
@@ -76,7 +77,12 @@  static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
 {
 	aperture_pfn_start = aper_base >> PAGE_SHIFT;
 	aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
-	WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram));
+#ifdef CONFIG_PROC_VMCORE
+	WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
+#endif
+#ifdef CONFIG_PROC_KCORE
+	WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
+#endif
 }
 #else
 static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index bbcc185062bb..24e13fcc31ea 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -54,6 +54,35 @@  static LIST_HEAD(kclist_head);
 static DECLARE_RWSEM(kclist_lock);
 static int kcore_need_update = 1;
 
+static int (*mem_pfn_is_ram)(unsigned long pfn);
+
+int register_mem_pfn_is_ram(int (*fn)(unsigned long pfn))
+{
+	if (mem_pfn_is_ram)
+		return -EBUSY;
+	mem_pfn_is_ram = fn;
+	return 0;
+}
+
+void unregister_mem_pfn_is_ram(void)
+{
+	mem_pfn_is_ram = NULL;
+	wmb();
+}
+
+static int pfn_is_ram(unsigned long pfn)
+{
+	int (*fn)(unsigned long pfn);
+	/* pfn is ram unless fn() checks pagetype */
+	int ret = 1;
+
+	fn = mem_pfn_is_ram;
+	if (fn)
+		ret = fn(pfn);
+
+	return ret;
+}
+
 /* This doesn't grab kclist_lock, so it should only be used at init time. */
 void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
 		       int type)
@@ -465,6 +494,11 @@  read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 				goto out;
 			}
 			m = NULL;	/* skip the list anchor */
+		} else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) {
+			if (clear_user(buffer, tsz)) {
+				ret = -EFAULT;
+				goto out;
+			}
 		} else if (m->type == KCORE_VMALLOC) {
 			vread(buf, (char *)start, tsz);
 			/* we have to zero-fill user buffer even if no read */
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8c3f8c14eeaa..2130a35f883b 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -44,6 +44,9 @@  void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
 	m->vaddr = (unsigned long)vaddr;
 	kclist_add(m, addr, sz, KCORE_REMAP);
 }
+
+extern int register_mem_pfn_is_ram(int (*fn)(unsigned long pfn));
+extern void unregister_mem_pfn_is_ram(void);
 #else
 static inline
 void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)