All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5] x86/gart/kcore: Exclude GART aperture from kcore
@ 2019-03-08  3:05 Kairui Song
  2019-03-08 12:06 ` Jiri Bohac
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Kairui Song @ 2019-03-08  3:05 UTC (permalink / raw)
  To: linux-kernel
  Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
	x86, Alexey Dobriyan, Andrew Morton, Omar Sandoval, Jiri Bohac,
	Baoquan He, Dave Young, Kairui Song

On machines where the GART aperture is mapped over physical RAM,
/proc/kcore contains the GART aperture range and reading it may lead
to kernel panic.

Vmcore used to have the same issue, until we fixed it in
commit 2a3e83c6f96c ("x86/gart: Exclude GART aperture from vmcore")',
leveraging existing hook infrastructure in vmcore to let /proc/vmcore
return zeroes when attempting to read the aperture region, and so it
won't read from the actual memory.

We apply the same workaround for kcore. First implement the same hook
infrastructure for kcore, then reuse the hook functions introduced in
previous vmcore fix. Just with some minor adjustment, rename some
functions for more general usage, and simplify the hook infrastructure
a bit as there is no module usage yet.

Suggested-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Kairui Song <kasong@redhat.com>

---

Update from V4:
- Remove the unregistering funtion and move functions never used after
  init to .init

Update from V3:
- Reuse the approach in V2, as Jiri noticed V3 approach may fail
  some use case. It introduce overlapped region in kcore, and can't
  garenteen the read request will fall into the region we wanted.
- Improve some function naming suggested by Baoquan in V2.
- Simplify the hook registering and checking, we are not exporting the
  hook register function for now, no need to make it that complex.

Update from V2:
Instead of repeating the same hook infrastructure for kcore, introduce
a new kcore area type to avoid reading from, and let kcore always bypass
this kind of area.

Update from V1:
Fix a complie error when CONFIG_PROC_KCORE is not set

 arch/x86/kernel/aperture_64.c | 20 +++++++++++++-------
 fs/proc/kcore.c               | 27 +++++++++++++++++++++++++++
 include/linux/kcore.h         |  2 ++
 3 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 58176b56354e..294ed4392a0e 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) "AGP: " fmt
 
 #include <linux/kernel.h>
+#include <linux/kcore.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/memblock.h>
@@ -57,7 +58,7 @@ int fallback_aper_force __initdata;
 
 int fix_aperture __initdata = 1;
 
-#ifdef CONFIG_PROC_VMCORE
+#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE)
 /*
  * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
  * use the same range because it will remain configured in the northbridge.
@@ -66,20 +67,25 @@ int fix_aperture __initdata = 1;
  */
 static unsigned long aperture_pfn_start, aperture_page_count;
 
-static int gart_oldmem_pfn_is_ram(unsigned long pfn)
+static int gart_mem_pfn_is_ram(unsigned long pfn)
 {
 	return likely((pfn < aperture_pfn_start) ||
 		      (pfn >= aperture_pfn_start + aperture_page_count));
 }
 
-static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
+static void __init exclude_from_core(u64 aper_base, u32 aper_order)
 {
 	aperture_pfn_start = aper_base >> PAGE_SHIFT;
 	aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
-	WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram));
+#ifdef CONFIG_PROC_VMCORE
+	WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
+#endif
+#ifdef CONFIG_PROC_KCORE
+	WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
+#endif
 }
 #else
-static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
+static void exclude_from_core(u64 aper_base, u32 aper_order)
 {
 }
 #endif
@@ -474,7 +480,7 @@ int __init gart_iommu_hole_init(void)
 			 * may have allocated the range over its e820 RAM
 			 * and fixed up the northbridge
 			 */
-			exclude_from_vmcore(last_aper_base, last_aper_order);
+			exclude_from_core(last_aper_base, last_aper_order);
 
 			return 1;
 		}
@@ -520,7 +526,7 @@ int __init gart_iommu_hole_init(void)
 	 * overlap with the first kernel's memory. We can't access the
 	 * range through vmcore even though it should be part of the dump.
 	 */
-	exclude_from_vmcore(aper_alloc, aper_order);
+	exclude_from_core(aper_alloc, aper_order);
 
 	/* Fix up the north bridges */
 	for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index bbcc185062bb..d29d869abec1 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -54,6 +54,28 @@ static LIST_HEAD(kclist_head);
 static DECLARE_RWSEM(kclist_lock);
 static int kcore_need_update = 1;
 
+/*
+ * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
+ * Same as oldmem_pfn_is_ram in vmcore
+ */
+static int (*mem_pfn_is_ram)(unsigned long pfn);
+
+int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn))
+{
+	if (mem_pfn_is_ram)
+		return -EBUSY;
+	mem_pfn_is_ram = fn;
+	return 0;
+}
+
+static int pfn_is_ram(unsigned long pfn)
+{
+	if (mem_pfn_is_ram)
+		return mem_pfn_is_ram(pfn);
+	else
+		return 1;
+}
+
 /* This doesn't grab kclist_lock, so it should only be used at init time. */
 void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
 		       int type)
@@ -465,6 +487,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 				goto out;
 			}
 			m = NULL;	/* skip the list anchor */
+		} else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) {
+			if (clear_user(buffer, tsz)) {
+				ret = -EFAULT;
+				goto out;
+			}
 		} else if (m->type == KCORE_VMALLOC) {
 			vread(buf, (char *)start, tsz);
 			/* we have to zero-fill user buffer even if no read */
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8c3f8c14eeaa..c843f4a9c512 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -44,6 +44,8 @@ void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
 	m->vaddr = (unsigned long)vaddr;
 	kclist_add(m, addr, sz, KCORE_REMAP);
 }
+
+extern int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn));
 #else
 static inline
 void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v5] x86/gart/kcore: Exclude GART aperture from kcore
  2019-03-08  3:05 [PATCH v5] x86/gart/kcore: Exclude GART aperture from kcore Kairui Song
@ 2019-03-08 12:06 ` Jiri Bohac
  2019-03-11  0:02 ` Baoquan He
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Jiri Bohac @ 2019-03-08 12:06 UTC (permalink / raw)
  To: Kairui Song
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	H. Peter Anvin, x86, Alexey Dobriyan, Andrew Morton,
	Omar Sandoval, Baoquan He, Dave Young

On Fri, Mar 08, 2019 at 11:05:08AM +0800, Kairui Song wrote:
> Suggested-by: Baoquan He <bhe@redhat.com>
> Signed-off-by: Kairui Song <kasong@redhat.com>

Reviewed-by: Jiri Bohac <jbohac@suse.cz>

Thanks,

-- 
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, Prague, Czechia


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v5] x86/gart/kcore: Exclude GART aperture from kcore
  2019-03-08  3:05 [PATCH v5] x86/gart/kcore: Exclude GART aperture from kcore Kairui Song
  2019-03-08 12:06 ` Jiri Bohac
@ 2019-03-11  0:02 ` Baoquan He
  2019-03-22  2:02 ` Kairui Song
  2019-03-23 11:16 ` [tip:x86/urgent] x86/gart: " tip-bot for Kairui Song
  3 siblings, 0 replies; 5+ messages in thread
From: Baoquan He @ 2019-03-11  0:02 UTC (permalink / raw)
  To: Kairui Song
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	H. Peter Anvin, x86, Alexey Dobriyan, Andrew Morton,
	Omar Sandoval, Jiri Bohac, Dave Young

On 03/08/19 at 11:05am, Kairui Song wrote:
> On machines where the GART aperture is mapped over physical RAM,
> /proc/kcore contains the GART aperture range and reading it may lead
> to kernel panic.
> 
> Vmcore used to have the same issue, until we fixed it in
> commit 2a3e83c6f96c ("x86/gart: Exclude GART aperture from vmcore")',
> leveraging existing hook infrastructure in vmcore to let /proc/vmcore
> return zeroes when attempting to read the aperture region, and so it
> won't read from the actual memory.
> 
> We apply the same workaround for kcore. First implement the same hook
> infrastructure for kcore, then reuse the hook functions introduced in
> previous vmcore fix. Just with some minor adjustment, rename some
> functions for more general usage, and simplify the hook infrastructure
> a bit as there is no module usage yet.
> 
> Suggested-by: Baoquan He <bhe@redhat.com>
> Signed-off-by: Kairui Song <kasong@redhat.com>
> 
> ---
> 

Looks good to me, thanks for the effort.

Acked-by: Baoquan He <bhe@redhat.com>

Thanks
Baoquan


> Update from V4:
> - Remove the unregistering funtion and move functions never used after
>   init to .init
> 
> Update from V3:
> - Reuse the approach in V2, as Jiri noticed V3 approach may fail
>   some use case. It introduce overlapped region in kcore, and can't
>   garenteen the read request will fall into the region we wanted.
> - Improve some function naming suggested by Baoquan in V2.
> - Simplify the hook registering and checking, we are not exporting the
>   hook register function for now, no need to make it that complex.
> 
> Update from V2:
> Instead of repeating the same hook infrastructure for kcore, introduce
> a new kcore area type to avoid reading from, and let kcore always bypass
> this kind of area.
> 
> Update from V1:
> Fix a complie error when CONFIG_PROC_KCORE is not set
> 
>  arch/x86/kernel/aperture_64.c | 20 +++++++++++++-------
>  fs/proc/kcore.c               | 27 +++++++++++++++++++++++++++
>  include/linux/kcore.h         |  2 ++
>  3 files changed, 42 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
> index 58176b56354e..294ed4392a0e 100644
> --- a/arch/x86/kernel/aperture_64.c
> +++ b/arch/x86/kernel/aperture_64.c
> @@ -14,6 +14,7 @@
>  #define pr_fmt(fmt) "AGP: " fmt
>  
>  #include <linux/kernel.h>
> +#include <linux/kcore.h>
>  #include <linux/types.h>
>  #include <linux/init.h>
>  #include <linux/memblock.h>
> @@ -57,7 +58,7 @@ int fallback_aper_force __initdata;
>  
>  int fix_aperture __initdata = 1;
>  
> -#ifdef CONFIG_PROC_VMCORE
> +#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE)
>  /*
>   * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
>   * use the same range because it will remain configured in the northbridge.
> @@ -66,20 +67,25 @@ int fix_aperture __initdata = 1;
>   */
>  static unsigned long aperture_pfn_start, aperture_page_count;
>  
> -static int gart_oldmem_pfn_is_ram(unsigned long pfn)
> +static int gart_mem_pfn_is_ram(unsigned long pfn)
>  {
>  	return likely((pfn < aperture_pfn_start) ||
>  		      (pfn >= aperture_pfn_start + aperture_page_count));
>  }
>  
> -static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
> +static void __init exclude_from_core(u64 aper_base, u32 aper_order)
>  {
>  	aperture_pfn_start = aper_base >> PAGE_SHIFT;
>  	aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
> -	WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram));
> +#ifdef CONFIG_PROC_VMCORE
> +	WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
> +#endif
> +#ifdef CONFIG_PROC_KCORE
> +	WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
> +#endif
>  }
>  #else
> -static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
> +static void exclude_from_core(u64 aper_base, u32 aper_order)
>  {
>  }
>  #endif
> @@ -474,7 +480,7 @@ int __init gart_iommu_hole_init(void)
>  			 * may have allocated the range over its e820 RAM
>  			 * and fixed up the northbridge
>  			 */
> -			exclude_from_vmcore(last_aper_base, last_aper_order);
> +			exclude_from_core(last_aper_base, last_aper_order);
>  
>  			return 1;
>  		}
> @@ -520,7 +526,7 @@ int __init gart_iommu_hole_init(void)
>  	 * overlap with the first kernel's memory. We can't access the
>  	 * range through vmcore even though it should be part of the dump.
>  	 */
> -	exclude_from_vmcore(aper_alloc, aper_order);
> +	exclude_from_core(aper_alloc, aper_order);
>  
>  	/* Fix up the north bridges */
>  	for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> index bbcc185062bb..d29d869abec1 100644
> --- a/fs/proc/kcore.c
> +++ b/fs/proc/kcore.c
> @@ -54,6 +54,28 @@ static LIST_HEAD(kclist_head);
>  static DECLARE_RWSEM(kclist_lock);
>  static int kcore_need_update = 1;
>  
> +/*
> + * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
> + * Same as oldmem_pfn_is_ram in vmcore
> + */
> +static int (*mem_pfn_is_ram)(unsigned long pfn);
> +
> +int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn))
> +{
> +	if (mem_pfn_is_ram)
> +		return -EBUSY;
> +	mem_pfn_is_ram = fn;
> +	return 0;
> +}
> +
> +static int pfn_is_ram(unsigned long pfn)
> +{
> +	if (mem_pfn_is_ram)
> +		return mem_pfn_is_ram(pfn);
> +	else
> +		return 1;
> +}
> +
>  /* This doesn't grab kclist_lock, so it should only be used at init time. */
>  void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
>  		       int type)
> @@ -465,6 +487,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
>  				goto out;
>  			}
>  			m = NULL;	/* skip the list anchor */
> +		} else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) {
> +			if (clear_user(buffer, tsz)) {
> +				ret = -EFAULT;
> +				goto out;
> +			}
>  		} else if (m->type == KCORE_VMALLOC) {
>  			vread(buf, (char *)start, tsz);
>  			/* we have to zero-fill user buffer even if no read */
> diff --git a/include/linux/kcore.h b/include/linux/kcore.h
> index 8c3f8c14eeaa..c843f4a9c512 100644
> --- a/include/linux/kcore.h
> +++ b/include/linux/kcore.h
> @@ -44,6 +44,8 @@ void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
>  	m->vaddr = (unsigned long)vaddr;
>  	kclist_add(m, addr, sz, KCORE_REMAP);
>  }
> +
> +extern int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn));
>  #else
>  static inline
>  void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v5] x86/gart/kcore: Exclude GART aperture from kcore
  2019-03-08  3:05 [PATCH v5] x86/gart/kcore: Exclude GART aperture from kcore Kairui Song
  2019-03-08 12:06 ` Jiri Bohac
  2019-03-11  0:02 ` Baoquan He
@ 2019-03-22  2:02 ` Kairui Song
  2019-03-23 11:16 ` [tip:x86/urgent] x86/gart: " tip-bot for Kairui Song
  3 siblings, 0 replies; 5+ messages in thread
From: Kairui Song @ 2019-03-22  2:02 UTC (permalink / raw)
  To: Linux Kernel Mailing List
  Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
	the arch/x86 maintainers, Alexey Dobriyan, Andrew Morton,
	Omar Sandoval, Jiri Bohac, Baoquan He, Dave Young

On Fri, Mar 8, 2019 at 11:06 AM Kairui Song <kasong@redhat.com> wrote:
>
> On machines where the GART aperture is mapped over physical RAM,
> /proc/kcore contains the GART aperture range and reading it may lead
> to kernel panic.
>
> Vmcore used to have the same issue, until we fixed it in
> commit 2a3e83c6f96c ("x86/gart: Exclude GART aperture from vmcore")',
> leveraging existing hook infrastructure in vmcore to let /proc/vmcore
> return zeroes when attempting to read the aperture region, and so it
> won't read from the actual memory.
>
> We apply the same workaround for kcore. First implement the same hook
> infrastructure for kcore, then reuse the hook functions introduced in
> previous vmcore fix. Just with some minor adjustment, rename some
> functions for more general usage, and simplify the hook infrastructure
> a bit as there is no module usage yet.
>
> Suggested-by: Baoquan He <bhe@redhat.com>
> Signed-off-by: Kairui Song <kasong@redhat.com>
>
> ---
>
> Update from V4:
> - Remove the unregistering funtion and move functions never used after
>   init to .init
>
> Update from V3:
> - Reuse the approach in V2, as Jiri noticed V3 approach may fail
>   some use case. It introduce overlapped region in kcore, and can't
>   garenteen the read request will fall into the region we wanted.
> - Improve some function naming suggested by Baoquan in V2.
> - Simplify the hook registering and checking, we are not exporting the
>   hook register function for now, no need to make it that complex.
>
> Update from V2:
> Instead of repeating the same hook infrastructure for kcore, introduce
> a new kcore area type to avoid reading from, and let kcore always bypass
> this kind of area.
>
> Update from V1:
> Fix a complie error when CONFIG_PROC_KCORE is not set
>
>  arch/x86/kernel/aperture_64.c | 20 +++++++++++++-------
>  fs/proc/kcore.c               | 27 +++++++++++++++++++++++++++
>  include/linux/kcore.h         |  2 ++
>  3 files changed, 42 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
> index 58176b56354e..294ed4392a0e 100644
> --- a/arch/x86/kernel/aperture_64.c
> +++ b/arch/x86/kernel/aperture_64.c
> @@ -14,6 +14,7 @@
>  #define pr_fmt(fmt) "AGP: " fmt
>
>  #include <linux/kernel.h>
> +#include <linux/kcore.h>
>  #include <linux/types.h>
>  #include <linux/init.h>
>  #include <linux/memblock.h>
> @@ -57,7 +58,7 @@ int fallback_aper_force __initdata;
>
>  int fix_aperture __initdata = 1;
>
> -#ifdef CONFIG_PROC_VMCORE
> +#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE)
>  /*
>   * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
>   * use the same range because it will remain configured in the northbridge.
> @@ -66,20 +67,25 @@ int fix_aperture __initdata = 1;
>   */
>  static unsigned long aperture_pfn_start, aperture_page_count;
>
> -static int gart_oldmem_pfn_is_ram(unsigned long pfn)
> +static int gart_mem_pfn_is_ram(unsigned long pfn)
>  {
>         return likely((pfn < aperture_pfn_start) ||
>                       (pfn >= aperture_pfn_start + aperture_page_count));
>  }
>
> -static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
> +static void __init exclude_from_core(u64 aper_base, u32 aper_order)
>  {
>         aperture_pfn_start = aper_base >> PAGE_SHIFT;
>         aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
> -       WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram));
> +#ifdef CONFIG_PROC_VMCORE
> +       WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
> +#endif
> +#ifdef CONFIG_PROC_KCORE
> +       WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
> +#endif
>  }
>  #else
> -static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
> +static void exclude_from_core(u64 aper_base, u32 aper_order)
>  {
>  }
>  #endif
> @@ -474,7 +480,7 @@ int __init gart_iommu_hole_init(void)
>                          * may have allocated the range over its e820 RAM
>                          * and fixed up the northbridge
>                          */
> -                       exclude_from_vmcore(last_aper_base, last_aper_order);
> +                       exclude_from_core(last_aper_base, last_aper_order);
>
>                         return 1;
>                 }
> @@ -520,7 +526,7 @@ int __init gart_iommu_hole_init(void)
>          * overlap with the first kernel's memory. We can't access the
>          * range through vmcore even though it should be part of the dump.
>          */
> -       exclude_from_vmcore(aper_alloc, aper_order);
> +       exclude_from_core(aper_alloc, aper_order);
>
>         /* Fix up the north bridges */
>         for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> index bbcc185062bb..d29d869abec1 100644
> --- a/fs/proc/kcore.c
> +++ b/fs/proc/kcore.c
> @@ -54,6 +54,28 @@ static LIST_HEAD(kclist_head);
>  static DECLARE_RWSEM(kclist_lock);
>  static int kcore_need_update = 1;
>
> +/*
> + * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
> + * Same as oldmem_pfn_is_ram in vmcore
> + */
> +static int (*mem_pfn_is_ram)(unsigned long pfn);
> +
> +int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn))
> +{
> +       if (mem_pfn_is_ram)
> +               return -EBUSY;
> +       mem_pfn_is_ram = fn;
> +       return 0;
> +}
> +
> +static int pfn_is_ram(unsigned long pfn)
> +{
> +       if (mem_pfn_is_ram)
> +               return mem_pfn_is_ram(pfn);
> +       else
> +               return 1;
> +}
> +
>  /* This doesn't grab kclist_lock, so it should only be used at init time. */
>  void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
>                        int type)
> @@ -465,6 +487,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
>                                 goto out;
>                         }
>                         m = NULL;       /* skip the list anchor */
> +               } else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) {
> +                       if (clear_user(buffer, tsz)) {
> +                               ret = -EFAULT;
> +                               goto out;
> +                       }
>                 } else if (m->type == KCORE_VMALLOC) {
>                         vread(buf, (char *)start, tsz);
>                         /* we have to zero-fill user buffer even if no read */
> diff --git a/include/linux/kcore.h b/include/linux/kcore.h
> index 8c3f8c14eeaa..c843f4a9c512 100644
> --- a/include/linux/kcore.h
> +++ b/include/linux/kcore.h
> @@ -44,6 +44,8 @@ void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
>         m->vaddr = (unsigned long)vaddr;
>         kclist_add(m, addr, sz, KCORE_REMAP);
>  }
> +
> +extern int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn));
>  #else
>  static inline
>  void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
> --
> 2.20.1
>

Ping? Can any one help to take this patch?

--
Best Regards,
Kairui Song

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [tip:x86/urgent] x86/gart: Exclude GART aperture from kcore
  2019-03-08  3:05 [PATCH v5] x86/gart/kcore: Exclude GART aperture from kcore Kairui Song
                   ` (2 preceding siblings ...)
  2019-03-22  2:02 ` Kairui Song
@ 2019-03-23 11:16 ` tip-bot for Kairui Song
  3 siblings, 0 replies; 5+ messages in thread
From: tip-bot for Kairui Song @ 2019-03-23 11:16 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: dyoung, mingo, bp, jbohac, linux-kernel, tglx, adobriyan, kasong,
	bhe, hpa, akpm, osandov

Commit-ID:  ffc8599aa9763f39f6736a79da4d1575e7006f9a
Gitweb:     https://git.kernel.org/tip/ffc8599aa9763f39f6736a79da4d1575e7006f9a
Author:     Kairui Song <kasong@redhat.com>
AuthorDate: Fri, 8 Mar 2019 11:05:08 +0800
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Sat, 23 Mar 2019 12:11:49 +0100

x86/gart: Exclude GART aperture from kcore

On machines where the GART aperture is mapped over physical RAM,
/proc/kcore contains the GART aperture range. Accessing the GART range via
/proc/kcore results in a kernel crash.

vmcore used to have the same issue, until it was fixed with commit
2a3e83c6f96c ("x86/gart: Exclude GART aperture from vmcore")', leveraging
existing hook infrastructure in vmcore to let /proc/vmcore return zeroes
when attempting to read the aperture region, and so it won't read from the
actual memory.

Apply the same workaround for kcore. First implement the same hook
infrastructure for kcore, then reuse the hook functions introduced in the
previous vmcore fix. Just with some minor adjustment, rename some functions
for more general usage, and simplify the hook infrastructure a bit as there
is no module usage yet.

Suggested-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Kairui Song <kasong@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jiri Bohac <jbohac@suse.cz>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Dave Young <dyoung@redhat.com>
Link: https://lkml.kernel.org/r/20190308030508.13548-1-kasong@redhat.com


---
 arch/x86/kernel/aperture_64.c | 20 +++++++++++++-------
 fs/proc/kcore.c               | 27 +++++++++++++++++++++++++++
 include/linux/kcore.h         |  2 ++
 3 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 58176b56354e..294ed4392a0e 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) "AGP: " fmt
 
 #include <linux/kernel.h>
+#include <linux/kcore.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/memblock.h>
@@ -57,7 +58,7 @@ int fallback_aper_force __initdata;
 
 int fix_aperture __initdata = 1;
 
-#ifdef CONFIG_PROC_VMCORE
+#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE)
 /*
  * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
  * use the same range because it will remain configured in the northbridge.
@@ -66,20 +67,25 @@ int fix_aperture __initdata = 1;
  */
 static unsigned long aperture_pfn_start, aperture_page_count;
 
-static int gart_oldmem_pfn_is_ram(unsigned long pfn)
+static int gart_mem_pfn_is_ram(unsigned long pfn)
 {
 	return likely((pfn < aperture_pfn_start) ||
 		      (pfn >= aperture_pfn_start + aperture_page_count));
 }
 
-static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
+static void __init exclude_from_core(u64 aper_base, u32 aper_order)
 {
 	aperture_pfn_start = aper_base >> PAGE_SHIFT;
 	aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
-	WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram));
+#ifdef CONFIG_PROC_VMCORE
+	WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
+#endif
+#ifdef CONFIG_PROC_KCORE
+	WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
+#endif
 }
 #else
-static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
+static void exclude_from_core(u64 aper_base, u32 aper_order)
 {
 }
 #endif
@@ -474,7 +480,7 @@ out:
 			 * may have allocated the range over its e820 RAM
 			 * and fixed up the northbridge
 			 */
-			exclude_from_vmcore(last_aper_base, last_aper_order);
+			exclude_from_core(last_aper_base, last_aper_order);
 
 			return 1;
 		}
@@ -520,7 +526,7 @@ out:
 	 * overlap with the first kernel's memory. We can't access the
 	 * range through vmcore even though it should be part of the dump.
 	 */
-	exclude_from_vmcore(aper_alloc, aper_order);
+	exclude_from_core(aper_alloc, aper_order);
 
 	/* Fix up the north bridges */
 	for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index bbcc185062bb..d29d869abec1 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -54,6 +54,28 @@ static LIST_HEAD(kclist_head);
 static DECLARE_RWSEM(kclist_lock);
 static int kcore_need_update = 1;
 
+/*
+ * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
+ * Same as oldmem_pfn_is_ram in vmcore
+ */
+static int (*mem_pfn_is_ram)(unsigned long pfn);
+
+int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn))
+{
+	if (mem_pfn_is_ram)
+		return -EBUSY;
+	mem_pfn_is_ram = fn;
+	return 0;
+}
+
+static int pfn_is_ram(unsigned long pfn)
+{
+	if (mem_pfn_is_ram)
+		return mem_pfn_is_ram(pfn);
+	else
+		return 1;
+}
+
 /* This doesn't grab kclist_lock, so it should only be used at init time. */
 void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
 		       int type)
@@ -465,6 +487,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 				goto out;
 			}
 			m = NULL;	/* skip the list anchor */
+		} else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) {
+			if (clear_user(buffer, tsz)) {
+				ret = -EFAULT;
+				goto out;
+			}
 		} else if (m->type == KCORE_VMALLOC) {
 			vread(buf, (char *)start, tsz);
 			/* we have to zero-fill user buffer even if no read */
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8c3f8c14eeaa..c843f4a9c512 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -44,6 +44,8 @@ void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
 	m->vaddr = (unsigned long)vaddr;
 	kclist_add(m, addr, sz, KCORE_REMAP);
 }
+
+extern int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn));
 #else
 static inline
 void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-03-23 11:16 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-08  3:05 [PATCH v5] x86/gart/kcore: Exclude GART aperture from kcore Kairui Song
2019-03-08 12:06 ` Jiri Bohac
2019-03-11  0:02 ` Baoquan He
2019-03-22  2:02 ` Kairui Song
2019-03-23 11:16 ` [tip:x86/urgent] x86/gart: " tip-bot for Kairui Song

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.