All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-08  3:32 Zheng, Shaohui
  2010-01-08  5:02   ` H. Peter Anvin
  2010-01-08 12:48   ` Wu Fengguang
  0 siblings, 2 replies; 39+ messages in thread
From: Zheng, Shaohui @ 2010-01-08  3:32 UTC (permalink / raw)
  To: linux-mm, akpm; +Cc: linux-kernel, ak, y-goto, Dave Hansen, Wu, Fengguang, x86

[-- Attachment #1: Type: text/plain, Size: 3951 bytes --]

Resend the patch to the mailing-list, the original patch URL is 
http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
sent it again to review.

Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel

The new added memory can not be access by interface /dev/mem, because we do not
 update the variable high_memory. This patch add a new e820 entry in e820 table,
 and update max_pfn, max_low_pfn and high_memory.

We add a function update_pfn in file arch/x86/mm/init.c to udpate these
 varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
 concern it in this function.

Signed-off-by: Shaohui Zheng <shaohui.zheng@intel.com>
CC: Andi Kleen <ak@linux.intel.com>
CC: Wu Fengguang <fengguang.wu@intel.com>
CC: Li Haicheng <Haicheng.li@intel.com>

---
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index f50447d..b986246 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -110,8 +110,8 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
 /*
  * Add a memory region to the kernel e820 map.
  */
-static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
-					 int type)
+static void __meminit __e820_add_region(struct e820map *e820x, u64 start,
+					 u64 size, int type)
 {
 	int x = e820x->nr_map;
 
@@ -126,7 +126,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
 	e820x->nr_map++;
 }
 
-void __init e820_add_region(u64 start, u64 size, int type)
+void __meminit e820_add_region(u64 start, u64 size, int type)
 {
 	__e820_add_region(&e820, start, size, type);
 }
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d406c52..0474459 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1,6 +1,7 @@
 #include <linux/initrd.h>
 #include <linux/ioport.h>
 #include <linux/swap.h>
+#include <linux/bootmem.h>
 
 #include <asm/cacheflush.h>
 #include <asm/e820.h>
@@ -386,3 +387,30 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 	free_init_pages("initrd memory", start, end);
 }
 #endif
+
+/**
+ * After memory hotplug, the variable max_pfn, max_low_pfn and high_memory will
+ * be affected, it will be updated in this function. Memory hotplug does not
+ * make sense on 32-bit kernel, so we do did not concern it in this function.
+ */
+void __meminit __attribute__((weak)) update_pfn(u64 start, u64 size)
+{
+#ifdef CONFIG_X86_64
+	unsigned long limit_low_pfn = 1UL<<(32 - PAGE_SHIFT);
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long end_pfn = (start + size) >> PAGE_SHIFT;
+
+	if (end_pfn > max_pfn) {
+		max_pfn = end_pfn;
+		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+	}
+
+	/* if add to low memory, update max_low_pfn */
+	if (unlikely(start_pfn < limit_low_pfn)) {
+		if (end_pfn <= limit_low_pfn)
+			max_low_pfn = end_pfn;
+		else
+			max_low_pfn = limit_low_pfn;
+	}
+#endif /* CONFIG_X86_64 */
+}
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b10ec49..6693414 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -13,6 +13,7 @@
 
 extern unsigned long max_low_pfn;
 extern unsigned long min_low_pfn;
+extern void update_pfn(u64 start, u64 size);
 
 /*
  * highest page
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 030ce8a..ee7b2d6 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -523,6 +523,14 @@ int __ref add_memory(int nid, u64 start, u64 size)
 		BUG_ON(ret);
 	}
 
+	/* update e820 table */
+	printk(KERN_INFO "Adding memory region to e820 table (start:%016Lx, size:%016Lx).\n",
+			 (unsigned long long)start, (unsigned long long)size);
+	e820_add_region(start, size, E820_RAM);
+
+	/* update max_pfn, max_low_pfn and high_memory */
+	update_pfn(start, size);
+
 	goto out;
 
 error:

Thanks & Regards,
Shaohui




[-- Attachment #2: memory-hotplug-Fix-the-bug-on-interface-dev-mem-v1.patch --]
[-- Type: application/octet-stream, Size: 3508 bytes --]

Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel

The new added memory can not be access by interface /dev/mem, because we do not
 update the variable high_memory. This patch add a new e820 entry in e820 table,
 and update max_pfn, max_low_pfn and high_memory.

We add a function update_pfn in file arch/x86/mm/init.c to udpate these
 varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
 concern it in this function.

Signed-off-by: Shaohui Zheng <shaohui.zheng@intel.com>
---
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index f50447d..b986246 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -110,8 +110,8 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
 /*
  * Add a memory region to the kernel e820 map.
  */
-static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
-					 int type)
+static void __meminit __e820_add_region(struct e820map *e820x, u64 start,
+					 u64 size, int type)
 {
 	int x = e820x->nr_map;
 
@@ -126,7 +126,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
 	e820x->nr_map++;
 }
 
-void __init e820_add_region(u64 start, u64 size, int type)
+void __meminit e820_add_region(u64 start, u64 size, int type)
 {
 	__e820_add_region(&e820, start, size, type);
 }
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d406c52..0474459 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1,6 +1,7 @@
 #include <linux/initrd.h>
 #include <linux/ioport.h>
 #include <linux/swap.h>
+#include <linux/bootmem.h>
 
 #include <asm/cacheflush.h>
 #include <asm/e820.h>
@@ -386,3 +387,30 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 	free_init_pages("initrd memory", start, end);
 }
 #endif
+
+/**
+ * After memory hotplug, the variable max_pfn, max_low_pfn and high_memory will
+ * be affected, it will be updated in this function. Memory hotplug does not
+ * make sense on 32-bit kernel, so we do did not concern it in this function.
+ */
+void __meminit __attribute__((weak)) update_pfn(u64 start, u64 size)
+{
+#ifdef CONFIG_X86_64
+	unsigned long limit_low_pfn = 1UL<<(32 - PAGE_SHIFT);
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long end_pfn = (start + size) >> PAGE_SHIFT;
+
+	if (end_pfn > max_pfn) {
+		max_pfn = end_pfn;
+		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+	}
+
+	/* if add to low memory, update max_low_pfn */
+	if (unlikely(start_pfn < limit_low_pfn)) {
+		if (end_pfn <= limit_low_pfn)
+			max_low_pfn = end_pfn;
+		else
+			max_low_pfn = limit_low_pfn;
+	}
+#endif /* CONFIG_X86_64 */
+}
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b10ec49..6693414 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -13,6 +13,7 @@
 
 extern unsigned long max_low_pfn;
 extern unsigned long min_low_pfn;
+extern void update_pfn(u64 start, u64 size);
 
 /*
  * highest page
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 030ce8a..ee7b2d6 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -523,6 +523,14 @@ int __ref add_memory(int nid, u64 start, u64 size)
 		BUG_ON(ret);
 	}
 
+	/* update e820 table */
+	printk(KERN_INFO "Adding memory region to e820 table (start:%016Lx, size:%016Lx).\n",
+			 (unsigned long long)start, (unsigned long long)size);
+	e820_add_region(start, size, E820_RAM);
+
+	/* update max_pfn, max_low_pfn and high_memory */
+	update_pfn(start, size);
+
 	goto out;
 
 error:

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-08  3:32 [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1) Zheng, Shaohui
@ 2010-01-08  5:02   ` H. Peter Anvin
  2010-01-08 12:48   ` Wu Fengguang
  1 sibling, 0 replies; 39+ messages in thread
From: H. Peter Anvin @ 2010-01-08  5:02 UTC (permalink / raw)
  To: Zheng, Shaohui
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, Wu,
	Fengguang, x86

On 01/07/2010 07:32 PM, Zheng, Shaohui wrote:
> Resend the patch to the mailing-list, the original patch URL is 
> http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
> sent it again to review.
> 
> Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel
> 
> The new added memory can not be access by interface /dev/mem, because we do not
>  update the variable high_memory. This patch add a new e820 entry in e820 table,
>  and update max_pfn, max_low_pfn and high_memory.
> 
> We add a function update_pfn in file arch/x86/mm/init.c to udpate these
>  varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
>  concern it in this function.
> 

Memory hotplug makes sense on 32-bit kernels, at least in virtual
environments.

	-hpa

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-08  5:02   ` H. Peter Anvin
  0 siblings, 0 replies; 39+ messages in thread
From: H. Peter Anvin @ 2010-01-08  5:02 UTC (permalink / raw)
  To: Zheng, Shaohui
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, Wu,
	Fengguang, x86

On 01/07/2010 07:32 PM, Zheng, Shaohui wrote:
> Resend the patch to the mailing-list, the original patch URL is 
> http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
> sent it again to review.
> 
> Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel
> 
> The new added memory can not be access by interface /dev/mem, because we do not
>  update the variable high_memory. This patch add a new e820 entry in e820 table,
>  and update max_pfn, max_low_pfn and high_memory.
> 
> We add a function update_pfn in file arch/x86/mm/init.c to udpate these
>  varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
>  concern it in this function.
> 

Memory hotplug makes sense on 32-bit kernels, at least in virtual
environments.

	-hpa

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-08  5:02   ` H. Peter Anvin
@ 2010-01-08  5:18     ` Zheng, Shaohui
  -1 siblings, 0 replies; 39+ messages in thread
From: Zheng, Shaohui @ 2010-01-08  5:18 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, Wu,
	Fengguang, x86

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 1712 bytes --]

Thanks Peter, my testing shows that there are many issues on 32-bit kernel for memory hot-add, there are still more works to do for 32-bit kernels(more than 2 patches). Memory hot-add is much more important on 64-bit kernel, I think that we can fix the bug on 64-bit kernel first. 32-kernel hotplug is the working in next step.

Thanks & Regards,
Shaohui


-----Original Message-----
From: H. Peter Anvin [mailto:hpa@kernel.org] 
Sent: Friday, January 08, 2010 1:03 PM
To: Zheng, Shaohui
Cc: linux-mm@kvack.org; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; ak@linux.intel.com; y-goto@jp.fujitsu.com; Dave Hansen; Wu, Fengguang; x86@kernel.org
Subject: Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)

On 01/07/2010 07:32 PM, Zheng, Shaohui wrote:
> Resend the patch to the mailing-list, the original patch URL is 
> http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
> sent it again to review.
> 
> Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel
> 
> The new added memory can not be access by interface /dev/mem, because we do not
>  update the variable high_memory. This patch add a new e820 entry in e820 table,
>  and update max_pfn, max_low_pfn and high_memory.
> 
> We add a function update_pfn in file arch/x86/mm/init.c to udpate these
>  varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
>  concern it in this function.
> 

Memory hotplug makes sense on 32-bit kernels, at least in virtual
environments.

	-hpa
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-08  5:18     ` Zheng, Shaohui
  0 siblings, 0 replies; 39+ messages in thread
From: Zheng, Shaohui @ 2010-01-08  5:18 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, Wu,
	Fengguang, x86

Thanks Peter, my testing shows that there are many issues on 32-bit kernel for memory hot-add, there are still more works to do for 32-bit kernels(more than 2 patches). Memory hot-add is much more important on 64-bit kernel, I think that we can fix the bug on 64-bit kernel first. 32-kernel hotplug is the working in next step.

Thanks & Regards,
Shaohui


-----Original Message-----
From: H. Peter Anvin [mailto:hpa@kernel.org] 
Sent: Friday, January 08, 2010 1:03 PM
To: Zheng, Shaohui
Cc: linux-mm@kvack.org; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; ak@linux.intel.com; y-goto@jp.fujitsu.com; Dave Hansen; Wu, Fengguang; x86@kernel.org
Subject: Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)

On 01/07/2010 07:32 PM, Zheng, Shaohui wrote:
> Resend the patch to the mailing-list, the original patch URL is 
> http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
> sent it again to review.
> 
> Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel
> 
> The new added memory can not be access by interface /dev/mem, because we do not
>  update the variable high_memory. This patch add a new e820 entry in e820 table,
>  and update max_pfn, max_low_pfn and high_memory.
> 
> We add a function update_pfn in file arch/x86/mm/init.c to udpate these
>  varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
>  concern it in this function.
> 

Memory hotplug makes sense on 32-bit kernels, at least in virtual
environments.

	-hpa

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-08  3:32 [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1) Zheng, Shaohui
@ 2010-01-08 12:48   ` Wu Fengguang
  2010-01-08 12:48   ` Wu Fengguang
  1 sibling, 0 replies; 39+ messages in thread
From: Wu Fengguang @ 2010-01-08 12:48 UTC (permalink / raw)
  To: Zheng, Shaohui
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, x86,
	KAMEZAWA Hiroyuki

On Fri, Jan 08, 2010 at 11:32:07AM +0800, Zheng, Shaohui wrote:
> Resend the patch to the mailing-list, the original patch URL is 
> http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
> sent it again to review.
> 
> Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel
> 
> The new added memory can not be access by interface /dev/mem, because we do not
>  update the variable high_memory. This patch add a new e820 entry in e820 table,
>  and update max_pfn, max_low_pfn and high_memory.
> 
> We add a function update_pfn in file arch/x86/mm/init.c to udpate these
>  varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
>  concern it in this function.
> 
> Signed-off-by: Shaohui Zheng <shaohui.zheng@intel.com>
> CC: Andi Kleen <ak@linux.intel.com>
> CC: Wu Fengguang <fengguang.wu@intel.com>
> CC: Li Haicheng <Haicheng.li@intel.com>
> 
> ---
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index f50447d..b986246 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -110,8 +110,8 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
>  /*
>   * Add a memory region to the kernel e820 map.
>   */
> -static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
> -					 int type)
> +static void __meminit __e820_add_region(struct e820map *e820x, u64 start,
> +					 u64 size, int type)
>  {
>  	int x = e820x->nr_map;
>  
> @@ -126,7 +126,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
>  	e820x->nr_map++;
>  }
>  
> -void __init e820_add_region(u64 start, u64 size, int type)
> +void __meminit e820_add_region(u64 start, u64 size, int type)
>  {
>  	__e820_add_region(&e820, start, size, type);
>  }
> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
> index d406c52..0474459 100644
> --- a/arch/x86/mm/init.c
> +++ b/arch/x86/mm/init.c
> @@ -1,6 +1,7 @@
>  #include <linux/initrd.h>
>  #include <linux/ioport.h>
>  #include <linux/swap.h>
> +#include <linux/bootmem.h>
>  
>  #include <asm/cacheflush.h>
>  #include <asm/e820.h>
> @@ -386,3 +387,30 @@ void free_initrd_mem(unsigned long start, unsigned long end)
>  	free_init_pages("initrd memory", start, end);
>  }
>  #endif
> +
> +/**
> + * After memory hotplug, the variable max_pfn, max_low_pfn and high_memory will
> + * be affected, it will be updated in this function. Memory hotplug does not
> + * make sense on 32-bit kernel, so we do did not concern it in this function.
> + */
> +void __meminit __attribute__((weak)) update_pfn(u64 start, u64 size)
> +{
> +#ifdef CONFIG_X86_64
> +	unsigned long limit_low_pfn = 1UL<<(32 - PAGE_SHIFT);
> +	unsigned long start_pfn = start >> PAGE_SHIFT;
> +	unsigned long end_pfn = (start + size) >> PAGE_SHIFT;

Strictly speaking, should use "end_pfn = PFN_UP(start + size);".

> +	if (end_pfn > max_pfn) {
> +		max_pfn = end_pfn;
> +		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
> +	}
> +
> +	/* if add to low memory, update max_low_pfn */
> +	if (unlikely(start_pfn < limit_low_pfn)) {
> +		if (end_pfn <= limit_low_pfn)
> +			max_low_pfn = end_pfn;
> +		else
> +			max_low_pfn = limit_low_pfn;

X86_64 actually always set max_low_pfn=max_pfn, in setup_arch():

 899 #ifdef CONFIG_X86_64
 900         if (max_pfn > max_low_pfn) {
 901                 max_pfn_mapped = init_memory_mapping(1UL<<32,
 902                                                      max_pfn<<PAGE_SHIFT);
 903                 /* can we preseve max_low_pfn ?*/
 904                 max_low_pfn = max_pfn;
 905         }
 906 #endif

max_low_pfn is used in

- e820_mark_nosave_regions(max_low_pfn);
- dump_pagetable()
- blk_queue_bounce_limit()
- increase_reservation()

and _seems_ to mean "end of direct addressable pfn".

> +	}
> +#endif /* CONFIG_X86_64 */
> +}
> diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
> index b10ec49..6693414 100644
> --- a/include/linux/bootmem.h
> +++ b/include/linux/bootmem.h
> @@ -13,6 +13,7 @@
>  
>  extern unsigned long max_low_pfn;
>  extern unsigned long min_low_pfn;
> +extern void update_pfn(u64 start, u64 size);
>  
>  /*
>   * highest page
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 030ce8a..ee7b2d6 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -523,6 +523,14 @@ int __ref add_memory(int nid, u64 start, u64 size)
>  		BUG_ON(ret);
>  	}
>  
> +	/* update e820 table */

This comment can be eliminated - you already have the very readable printk :)

> +	printk(KERN_INFO "Adding memory region to e820 table (start:%016Lx, size:%016Lx).\n",
> +			 (unsigned long long)start, (unsigned long long)size);
> +	e820_add_region(start, size, E820_RAM);

> +	/* update max_pfn, max_low_pfn and high_memory */
> +	update_pfn(start, size);

How about renaming function to update_end_of_memory_vars()?

Thanks,
Fengguang

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-08 12:48   ` Wu Fengguang
  0 siblings, 0 replies; 39+ messages in thread
From: Wu Fengguang @ 2010-01-08 12:48 UTC (permalink / raw)
  To: Zheng, Shaohui
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, x86,
	KAMEZAWA Hiroyuki

On Fri, Jan 08, 2010 at 11:32:07AM +0800, Zheng, Shaohui wrote:
> Resend the patch to the mailing-list, the original patch URL is 
> http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
> sent it again to review.
> 
> Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel
> 
> The new added memory can not be access by interface /dev/mem, because we do not
>  update the variable high_memory. This patch add a new e820 entry in e820 table,
>  and update max_pfn, max_low_pfn and high_memory.
> 
> We add a function update_pfn in file arch/x86/mm/init.c to udpate these
>  varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
>  concern it in this function.
> 
> Signed-off-by: Shaohui Zheng <shaohui.zheng@intel.com>
> CC: Andi Kleen <ak@linux.intel.com>
> CC: Wu Fengguang <fengguang.wu@intel.com>
> CC: Li Haicheng <Haicheng.li@intel.com>
> 
> ---
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index f50447d..b986246 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -110,8 +110,8 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
>  /*
>   * Add a memory region to the kernel e820 map.
>   */
> -static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
> -					 int type)
> +static void __meminit __e820_add_region(struct e820map *e820x, u64 start,
> +					 u64 size, int type)
>  {
>  	int x = e820x->nr_map;
>  
> @@ -126,7 +126,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
>  	e820x->nr_map++;
>  }
>  
> -void __init e820_add_region(u64 start, u64 size, int type)
> +void __meminit e820_add_region(u64 start, u64 size, int type)
>  {
>  	__e820_add_region(&e820, start, size, type);
>  }
> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
> index d406c52..0474459 100644
> --- a/arch/x86/mm/init.c
> +++ b/arch/x86/mm/init.c
> @@ -1,6 +1,7 @@
>  #include <linux/initrd.h>
>  #include <linux/ioport.h>
>  #include <linux/swap.h>
> +#include <linux/bootmem.h>
>  
>  #include <asm/cacheflush.h>
>  #include <asm/e820.h>
> @@ -386,3 +387,30 @@ void free_initrd_mem(unsigned long start, unsigned long end)
>  	free_init_pages("initrd memory", start, end);
>  }
>  #endif
> +
> +/**
> + * After memory hotplug, the variable max_pfn, max_low_pfn and high_memory will
> + * be affected, it will be updated in this function. Memory hotplug does not
> + * make sense on 32-bit kernel, so we do did not concern it in this function.
> + */
> +void __meminit __attribute__((weak)) update_pfn(u64 start, u64 size)
> +{
> +#ifdef CONFIG_X86_64
> +	unsigned long limit_low_pfn = 1UL<<(32 - PAGE_SHIFT);
> +	unsigned long start_pfn = start >> PAGE_SHIFT;
> +	unsigned long end_pfn = (start + size) >> PAGE_SHIFT;

Strictly speaking, should use "end_pfn = PFN_UP(start + size);".

> +	if (end_pfn > max_pfn) {
> +		max_pfn = end_pfn;
> +		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
> +	}
> +
> +	/* if add to low memory, update max_low_pfn */
> +	if (unlikely(start_pfn < limit_low_pfn)) {
> +		if (end_pfn <= limit_low_pfn)
> +			max_low_pfn = end_pfn;
> +		else
> +			max_low_pfn = limit_low_pfn;

X86_64 actually always set max_low_pfn=max_pfn, in setup_arch():

 899 #ifdef CONFIG_X86_64
 900         if (max_pfn > max_low_pfn) {
 901                 max_pfn_mapped = init_memory_mapping(1UL<<32,
 902                                                      max_pfn<<PAGE_SHIFT);
 903                 /* can we preseve max_low_pfn ?*/
 904                 max_low_pfn = max_pfn;
 905         }
 906 #endif

max_low_pfn is used in

- e820_mark_nosave_regions(max_low_pfn);
- dump_pagetable()
- blk_queue_bounce_limit()
- increase_reservation()

and _seems_ to mean "end of direct addressable pfn".

> +	}
> +#endif /* CONFIG_X86_64 */
> +}
> diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
> index b10ec49..6693414 100644
> --- a/include/linux/bootmem.h
> +++ b/include/linux/bootmem.h
> @@ -13,6 +13,7 @@
>  
>  extern unsigned long max_low_pfn;
>  extern unsigned long min_low_pfn;
> +extern void update_pfn(u64 start, u64 size);
>  
>  /*
>   * highest page
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 030ce8a..ee7b2d6 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -523,6 +523,14 @@ int __ref add_memory(int nid, u64 start, u64 size)
>  		BUG_ON(ret);
>  	}
>  
> +	/* update e820 table */

This comment can be eliminated - you already have the very readable printk :)

> +	printk(KERN_INFO "Adding memory region to e820 table (start:%016Lx, size:%016Lx).\n",
> +			 (unsigned long long)start, (unsigned long long)size);
> +	e820_add_region(start, size, E820_RAM);

> +	/* update max_pfn, max_low_pfn and high_memory */
> +	update_pfn(start, size);

How about renaming function to update_end_of_memory_vars()?

Thanks,
Fengguang

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-08  5:02   ` H. Peter Anvin
@ 2010-01-08 19:47     ` Andi Kleen
  -1 siblings, 0 replies; 39+ messages in thread
From: Andi Kleen @ 2010-01-08 19:47 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Zheng, Shaohui, linux-mm, akpm, linux-kernel, y-goto,
	Dave Hansen, Wu, Fengguang, x86

H. Peter Anvin wrote:
> On 01/07/2010 07:32 PM, Zheng, Shaohui wrote:
>> Resend the patch to the mailing-list, the original patch URL is 
>> http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
>> sent it again to review.
>>
>> Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel
>>
>> The new added memory can not be access by interface /dev/mem, because we do not
>>  update the variable high_memory. This patch add a new e820 entry in e820 table,
>>  and update max_pfn, max_low_pfn and high_memory.
>>
>> We add a function update_pfn in file arch/x86/mm/init.c to udpate these
>>  varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
>>  concern it in this function.
>>
> 
> Memory hotplug makes sense on 32-bit kernels, at least in virtual
> environments.

No VM currently supports it to my knowledge. They all use traditional
balooning.

If someone adds that they can still fix it, but right now fixing it on 64bit
is the important part.

-Andi

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-08 19:47     ` Andi Kleen
  0 siblings, 0 replies; 39+ messages in thread
From: Andi Kleen @ 2010-01-08 19:47 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Zheng, Shaohui, linux-mm, akpm, linux-kernel, y-goto,
	Dave Hansen, Wu, Fengguang, x86

H. Peter Anvin wrote:
> On 01/07/2010 07:32 PM, Zheng, Shaohui wrote:
>> Resend the patch to the mailing-list, the original patch URL is 
>> http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
>> sent it again to review.
>>
>> Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel
>>
>> The new added memory can not be access by interface /dev/mem, because we do not
>>  update the variable high_memory. This patch add a new e820 entry in e820 table,
>>  and update max_pfn, max_low_pfn and high_memory.
>>
>> We add a function update_pfn in file arch/x86/mm/init.c to udpate these
>>  varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
>>  concern it in this function.
>>
> 
> Memory hotplug makes sense on 32-bit kernels, at least in virtual
> environments.

No VM currently supports it to my knowledge. They all use traditional
balooning.

If someone adds that they can still fix it, but right now fixing it on 64bit
is the important part.

-Andi

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-08 12:48   ` Wu Fengguang
@ 2010-01-11  2:20     ` Zheng, Shaohui
  -1 siblings, 0 replies; 39+ messages in thread
From: Zheng, Shaohui @ 2010-01-11  2:20 UTC (permalink / raw)
  To: Wu, Fengguang
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, x86,
	KAMEZAWA Hiroyuki

Thanks Fengguang, see and comments in the email. Only a few different understanding on variable max_low_pfn.

Thanks & Regards,
Shaohui


-----Original Message-----
From: Wu, Fengguang 
Sent: Friday, January 08, 2010 8:49 PM
To: Zheng, Shaohui
Cc: linux-mm@kvack.org; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; ak@linux.intel.com; y-goto@jp.fujitsu.com; Dave Hansen; x86@kernel.org; KAMEZAWA Hiroyuki
Subject: Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)

On Fri, Jan 08, 2010 at 11:32:07AM +0800, Zheng, Shaohui wrote:
> Resend the patch to the mailing-list, the original patch URL is 
> http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
> sent it again to review.
> 
> Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel
> 
> The new added memory can not be access by interface /dev/mem, because we do not
>  update the variable high_memory. This patch add a new e820 entry in e820 table,
>  and update max_pfn, max_low_pfn and high_memory.
> 
> We add a function update_pfn in file arch/x86/mm/init.c to udpate these
>  varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
>  concern it in this function.
> 
> Signed-off-by: Shaohui Zheng <shaohui.zheng@intel.com>
> CC: Andi Kleen <ak@linux.intel.com>
> CC: Wu Fengguang <fengguang.wu@intel.com>
> CC: Li Haicheng <Haicheng.li@intel.com>
> 
> ---
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index f50447d..b986246 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -110,8 +110,8 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
>  /*
>   * Add a memory region to the kernel e820 map.
>   */
> -static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
> -					 int type)
> +static void __meminit __e820_add_region(struct e820map *e820x, u64 start,
> +					 u64 size, int type)
>  {
>  	int x = e820x->nr_map;
>  
> @@ -126,7 +126,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
>  	e820x->nr_map++;
>  }
>  
> -void __init e820_add_region(u64 start, u64 size, int type)
> +void __meminit e820_add_region(u64 start, u64 size, int type)
>  {
>  	__e820_add_region(&e820, start, size, type);
>  }
> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
> index d406c52..0474459 100644
> --- a/arch/x86/mm/init.c
> +++ b/arch/x86/mm/init.c
> @@ -1,6 +1,7 @@
>  #include <linux/initrd.h>
>  #include <linux/ioport.h>
>  #include <linux/swap.h>
> +#include <linux/bootmem.h>
>  
>  #include <asm/cacheflush.h>
>  #include <asm/e820.h>
> @@ -386,3 +387,30 @@ void free_initrd_mem(unsigned long start, unsigned long end)
>  	free_init_pages("initrd memory", start, end);
>  }
>  #endif
> +
> +/**
> + * After memory hotplug, the variable max_pfn, max_low_pfn and high_memory will
> + * be affected, it will be updated in this function. Memory hotplug does not
> + * make sense on 32-bit kernel, so we do did not concern it in this function.
> + */
> +void __meminit __attribute__((weak)) update_pfn(u64 start, u64 size)
> +{
> +#ifdef CONFIG_X86_64
> +	unsigned long limit_low_pfn = 1UL<<(32 - PAGE_SHIFT);
> +	unsigned long start_pfn = start >> PAGE_SHIFT;
> +	unsigned long end_pfn = (start + size) >> PAGE_SHIFT;

Strictly speaking, should use "end_pfn = PFN_UP(start + size);".
[Zheng, Shaohui] I will use PFN_UP to replace it in new version.

> +	if (end_pfn > max_pfn) {
> +		max_pfn = end_pfn;
> +		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
> +	}
> +
> +	/* if add to low memory, update max_low_pfn */
> +	if (unlikely(start_pfn < limit_low_pfn)) {
> +		if (end_pfn <= limit_low_pfn)
> +			max_low_pfn = end_pfn;
> +		else
> +			max_low_pfn = limit_low_pfn;

X86_64 actually always set max_low_pfn=max_pfn, in setup_arch():
[Zheng, Shaohui] there should be some misunderstanding, I read the code carefully, if the total memory is under 4G, it always max_low_pfn=max_pfn. If the total memory is larger than 4G, max_low_pfn means the end of low ram. It set max_low_pfn = e820_end_of_low_ram_pfn();.

 899 #ifdef CONFIG_X86_64
 900         if (max_pfn > max_low_pfn) {
 901                 max_pfn_mapped = init_memory_mapping(1UL<<32,
 902                                                      max_pfn<<PAGE_SHIFT);
 903                 /* can we preseve max_low_pfn ?*/
 904                 max_low_pfn = max_pfn;
 905         }
 906 #endif

max_low_pfn is used in

- e820_mark_nosave_regions(max_low_pfn);
- dump_pagetable()
- blk_queue_bounce_limit()
- increase_reservation()

and _seems_ to mean "end of direct addressable pfn".

> +	}
> +#endif /* CONFIG_X86_64 */
> +}
> diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
> index b10ec49..6693414 100644
> --- a/include/linux/bootmem.h
> +++ b/include/linux/bootmem.h
> @@ -13,6 +13,7 @@
>  
>  extern unsigned long max_low_pfn;
>  extern unsigned long min_low_pfn;
> +extern void update_pfn(u64 start, u64 size);
>  
>  /*
>   * highest page
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 030ce8a..ee7b2d6 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -523,6 +523,14 @@ int __ref add_memory(int nid, u64 start, u64 size)
>  		BUG_ON(ret);
>  	}
>  
> +	/* update e820 table */

This comment can be eliminated - you already have the very readable printk :)
[Zheng, Shaohui] I will remove this comment

> +	printk(KERN_INFO "Adding memory region to e820 table (start:%016Lx, size:%016Lx).\n",
> +			 (unsigned long long)start, (unsigned long long)size);
> +	e820_add_region(start, size, E820_RAM);

> +	/* update max_pfn, max_low_pfn and high_memory */
> +	update_pfn(start, size);

How about renaming function to update_end_of_memory_vars()?
[Zheng, Shaohui] Agree.

Thanks,
Fengguang

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-11  2:20     ` Zheng, Shaohui
  0 siblings, 0 replies; 39+ messages in thread
From: Zheng, Shaohui @ 2010-01-11  2:20 UTC (permalink / raw)
  To: Wu, Fengguang
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, x86,
	KAMEZAWA Hiroyuki

Thanks Fengguang, see and comments in the email. Only a few different understanding on variable max_low_pfn.

Thanks & Regards,
Shaohui


-----Original Message-----
From: Wu, Fengguang 
Sent: Friday, January 08, 2010 8:49 PM
To: Zheng, Shaohui
Cc: linux-mm@kvack.org; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; ak@linux.intel.com; y-goto@jp.fujitsu.com; Dave Hansen; x86@kernel.org; KAMEZAWA Hiroyuki
Subject: Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)

On Fri, Jan 08, 2010 at 11:32:07AM +0800, Zheng, Shaohui wrote:
> Resend the patch to the mailing-list, the original patch URL is 
> http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
> sent it again to review.
> 
> Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel
> 
> The new added memory can not be access by interface /dev/mem, because we do not
>  update the variable high_memory. This patch add a new e820 entry in e820 table,
>  and update max_pfn, max_low_pfn and high_memory.
> 
> We add a function update_pfn in file arch/x86/mm/init.c to udpate these
>  varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
>  concern it in this function.
> 
> Signed-off-by: Shaohui Zheng <shaohui.zheng@intel.com>
> CC: Andi Kleen <ak@linux.intel.com>
> CC: Wu Fengguang <fengguang.wu@intel.com>
> CC: Li Haicheng <Haicheng.li@intel.com>
> 
> ---
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index f50447d..b986246 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -110,8 +110,8 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
>  /*
>   * Add a memory region to the kernel e820 map.
>   */
> -static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
> -					 int type)
> +static void __meminit __e820_add_region(struct e820map *e820x, u64 start,
> +					 u64 size, int type)
>  {
>  	int x = e820x->nr_map;
>  
> @@ -126,7 +126,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
>  	e820x->nr_map++;
>  }
>  
> -void __init e820_add_region(u64 start, u64 size, int type)
> +void __meminit e820_add_region(u64 start, u64 size, int type)
>  {
>  	__e820_add_region(&e820, start, size, type);
>  }
> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
> index d406c52..0474459 100644
> --- a/arch/x86/mm/init.c
> +++ b/arch/x86/mm/init.c
> @@ -1,6 +1,7 @@
>  #include <linux/initrd.h>
>  #include <linux/ioport.h>
>  #include <linux/swap.h>
> +#include <linux/bootmem.h>
>  
>  #include <asm/cacheflush.h>
>  #include <asm/e820.h>
> @@ -386,3 +387,30 @@ void free_initrd_mem(unsigned long start, unsigned long end)
>  	free_init_pages("initrd memory", start, end);
>  }
>  #endif
> +
> +/**
> + * After memory hotplug, the variable max_pfn, max_low_pfn and high_memory will
> + * be affected, it will be updated in this function. Memory hotplug does not
> + * make sense on 32-bit kernel, so we do did not concern it in this function.
> + */
> +void __meminit __attribute__((weak)) update_pfn(u64 start, u64 size)
> +{
> +#ifdef CONFIG_X86_64
> +	unsigned long limit_low_pfn = 1UL<<(32 - PAGE_SHIFT);
> +	unsigned long start_pfn = start >> PAGE_SHIFT;
> +	unsigned long end_pfn = (start + size) >> PAGE_SHIFT;

Strictly speaking, should use "end_pfn = PFN_UP(start + size);".
[Zheng, Shaohui] I will use PFN_UP to replace it in new version.

> +	if (end_pfn > max_pfn) {
> +		max_pfn = end_pfn;
> +		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
> +	}
> +
> +	/* if add to low memory, update max_low_pfn */
> +	if (unlikely(start_pfn < limit_low_pfn)) {
> +		if (end_pfn <= limit_low_pfn)
> +			max_low_pfn = end_pfn;
> +		else
> +			max_low_pfn = limit_low_pfn;

X86_64 actually always set max_low_pfn=max_pfn, in setup_arch():
[Zheng, Shaohui] there should be some misunderstanding, I read the code carefully, if the total memory is under 4G, it always max_low_pfn=max_pfn. If the total memory is larger than 4G, max_low_pfn means the end of low ram. It set max_low_pfn = e820_end_of_low_ram_pfn();.

 899 #ifdef CONFIG_X86_64
 900         if (max_pfn > max_low_pfn) {
 901                 max_pfn_mapped = init_memory_mapping(1UL<<32,
 902                                                      max_pfn<<PAGE_SHIFT);
 903                 /* can we preseve max_low_pfn ?*/
 904                 max_low_pfn = max_pfn;
 905         }
 906 #endif

max_low_pfn is used in

- e820_mark_nosave_regions(max_low_pfn);
- dump_pagetable()
- blk_queue_bounce_limit()
- increase_reservation()

and _seems_ to mean "end of direct addressable pfn".

> +	}
> +#endif /* CONFIG_X86_64 */
> +}
> diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
> index b10ec49..6693414 100644
> --- a/include/linux/bootmem.h
> +++ b/include/linux/bootmem.h
> @@ -13,6 +13,7 @@
>  
>  extern unsigned long max_low_pfn;
>  extern unsigned long min_low_pfn;
> +extern void update_pfn(u64 start, u64 size);
>  
>  /*
>   * highest page
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 030ce8a..ee7b2d6 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -523,6 +523,14 @@ int __ref add_memory(int nid, u64 start, u64 size)
>  		BUG_ON(ret);
>  	}
>  
> +	/* update e820 table */

This comment can be eliminated - you already have the very readable printk :)
[Zheng, Shaohui] I will remove this comment

> +	printk(KERN_INFO "Adding memory region to e820 table (start:%016Lx, size:%016Lx).\n",
> +			 (unsigned long long)start, (unsigned long long)size);
> +	e820_add_region(start, size, E820_RAM);

> +	/* update max_pfn, max_low_pfn and high_memory */
> +	update_pfn(start, size);

How about renaming function to update_end_of_memory_vars()?
[Zheng, Shaohui] Agree.

Thanks,
Fengguang

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-11  2:20     ` Zheng, Shaohui
@ 2010-01-11 12:43       ` Wu Fengguang
  -1 siblings, 0 replies; 39+ messages in thread
From: Wu Fengguang @ 2010-01-11 12:43 UTC (permalink / raw)
  To: Zheng, Shaohui
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, x86,
	KAMEZAWA Hiroyuki

> > +	/* if add to low memory, update max_low_pfn */
> > +	if (unlikely(start_pfn < limit_low_pfn)) {
> > +		if (end_pfn <= limit_low_pfn)
> > +			max_low_pfn = end_pfn;
> > +		else
> > +			max_low_pfn = limit_low_pfn;
> 
> X86_64 actually always set max_low_pfn=max_pfn, in setup_arch():
> [Zheng, Shaohui] there should be some misunderstanding, I read the
> code carefully, if the total memory is under 4G, it always
> max_low_pfn=max_pfn. If the total memory is larger than 4G,
> max_low_pfn means the end of low ram. It set

> max_low_pfn = e820_end_of_low_ram_pfn();.

The above line is very misleading.. In setup_arch(), it will be
overrode by the following block.

>  899 #ifdef CONFIG_X86_64
>  900         if (max_pfn > max_low_pfn) {
>  901                 max_pfn_mapped = init_memory_mapping(1UL<<32,
>  902                                                      max_pfn<<PAGE_SHIFT);
>  903                 /* can we preseve max_low_pfn ?*/
>  904                 max_low_pfn = max_pfn;
>  905         }
>  906 #endif
 
Thanks,
Fengguang

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-11 12:43       ` Wu Fengguang
  0 siblings, 0 replies; 39+ messages in thread
From: Wu Fengguang @ 2010-01-11 12:43 UTC (permalink / raw)
  To: Zheng, Shaohui
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, x86,
	KAMEZAWA Hiroyuki

> > +	/* if add to low memory, update max_low_pfn */
> > +	if (unlikely(start_pfn < limit_low_pfn)) {
> > +		if (end_pfn <= limit_low_pfn)
> > +			max_low_pfn = end_pfn;
> > +		else
> > +			max_low_pfn = limit_low_pfn;
> 
> X86_64 actually always set max_low_pfn=max_pfn, in setup_arch():
> [Zheng, Shaohui] there should be some misunderstanding, I read the
> code carefully, if the total memory is under 4G, it always
> max_low_pfn=max_pfn. If the total memory is larger than 4G,
> max_low_pfn means the end of low ram. It set

> max_low_pfn = e820_end_of_low_ram_pfn();.

The above line is very misleading.. In setup_arch(), it will be
overrode by the following block.

>  899 #ifdef CONFIG_X86_64
>  900         if (max_pfn > max_low_pfn) {
>  901                 max_pfn_mapped = init_memory_mapping(1UL<<32,
>  902                                                      max_pfn<<PAGE_SHIFT);
>  903                 /* can we preseve max_low_pfn ?*/
>  904                 max_low_pfn = max_pfn;
>  905         }
>  906 #endif
 
Thanks,
Fengguang

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-11 12:43       ` Wu Fengguang
@ 2010-01-12  0:30         ` KAMEZAWA Hiroyuki
  -1 siblings, 0 replies; 39+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-01-12  0:30 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Zheng, Shaohui, linux-mm, akpm, linux-kernel, ak, y-goto,
	Dave Hansen, x86

On Mon, 11 Jan 2010 20:43:03 +0800
Wu Fengguang <fengguang.wu@intel.com> wrote:

> > > +	/* if add to low memory, update max_low_pfn */
> > > +	if (unlikely(start_pfn < limit_low_pfn)) {
> > > +		if (end_pfn <= limit_low_pfn)
> > > +			max_low_pfn = end_pfn;
> > > +		else
> > > +			max_low_pfn = limit_low_pfn;
> > 
> > X86_64 actually always set max_low_pfn=max_pfn, in setup_arch():
> > [Zheng, Shaohui] there should be some misunderstanding, I read the
> > code carefully, if the total memory is under 4G, it always
> > max_low_pfn=max_pfn. If the total memory is larger than 4G,
> > max_low_pfn means the end of low ram. It set
> 
> > max_low_pfn = e820_end_of_low_ram_pfn();.
> 
> The above line is very misleading.. In setup_arch(), it will be
> overrode by the following block.
> 

Hmmm....could you rewrite /dev/mem to use kernel/resource.c other than
modifing e820 maps. ?
Two reasons.
  - e820map is considerted to be stable, read-only after boot.
  - We don't need to add more x86 special codes.


Thanks,
-Kame


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-12  0:30         ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 39+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-01-12  0:30 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Zheng, Shaohui, linux-mm, akpm, linux-kernel, ak, y-goto,
	Dave Hansen, x86

On Mon, 11 Jan 2010 20:43:03 +0800
Wu Fengguang <fengguang.wu@intel.com> wrote:

> > > +	/* if add to low memory, update max_low_pfn */
> > > +	if (unlikely(start_pfn < limit_low_pfn)) {
> > > +		if (end_pfn <= limit_low_pfn)
> > > +			max_low_pfn = end_pfn;
> > > +		else
> > > +			max_low_pfn = limit_low_pfn;
> > 
> > X86_64 actually always set max_low_pfn=max_pfn, in setup_arch():
> > [Zheng, Shaohui] there should be some misunderstanding, I read the
> > code carefully, if the total memory is under 4G, it always
> > max_low_pfn=max_pfn. If the total memory is larger than 4G,
> > max_low_pfn means the end of low ram. It set
> 
> > max_low_pfn = e820_end_of_low_ram_pfn();.
> 
> The above line is very misleading.. In setup_arch(), it will be
> overrode by the following block.
> 

Hmmm....could you rewrite /dev/mem to use kernel/resource.c other than
modifing e820 maps. ?
Two reasons.
  - e820map is considerted to be stable, read-only after boot.
  - We don't need to add more x86 special codes.


Thanks,
-Kame

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-08 19:47     ` Andi Kleen
@ 2010-01-12  0:58       ` KAMEZAWA Hiroyuki
  -1 siblings, 0 replies; 39+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-01-12  0:58 UTC (permalink / raw)
  To: Andi Kleen
  Cc: H. Peter Anvin, Zheng, Shaohui, linux-mm, akpm, linux-kernel,
	y-goto, Dave Hansen, Wu, Fengguang, x86

On Fri, 08 Jan 2010 20:47:54 +0100
Andi Kleen <ak@linux.intel.com> wrote:

> H. Peter Anvin wrote:
> > On 01/07/2010 07:32 PM, Zheng, Shaohui wrote:
> >> Resend the patch to the mailing-list, the original patch URL is 
> >> http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
> >> sent it again to review.
> >>
> >> Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel
> >>
> >> The new added memory can not be access by interface /dev/mem, because we do not
> >>  update the variable high_memory. This patch add a new e820 entry in e820 table,
> >>  and update max_pfn, max_low_pfn and high_memory.
> >>
> >> We add a function update_pfn in file arch/x86/mm/init.c to udpate these
> >>  varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
> >>  concern it in this function.
> >>
> > 
> > Memory hotplug makes sense on 32-bit kernels, at least in virtual
> > environments.
> 
> No VM currently supports it to my knowledge. They all use traditional
> balooning.
> 
> If someone adds that they can still fix it, but right now fixing it on 64bit
> is the important part.
> 
I wonder...with some modification, memory hotplug (or Mel's page coalescing)
can be used for balloning in MAX_ORDER page size.
I'm sorry if VM' baloon drivers has no fragmentaion problem.

Thanks,
-Kame


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-12  0:58       ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 39+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-01-12  0:58 UTC (permalink / raw)
  To: Andi Kleen
  Cc: H. Peter Anvin, Zheng, Shaohui, linux-mm, akpm, linux-kernel,
	y-goto, Dave Hansen, Wu, Fengguang, x86

On Fri, 08 Jan 2010 20:47:54 +0100
Andi Kleen <ak@linux.intel.com> wrote:

> H. Peter Anvin wrote:
> > On 01/07/2010 07:32 PM, Zheng, Shaohui wrote:
> >> Resend the patch to the mailing-list, the original patch URL is 
> >> http://patchwork.kernel.org/patch/69075/, it is not accepted without comments,
> >> sent it again to review.
> >>
> >> Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel
> >>
> >> The new added memory can not be access by interface /dev/mem, because we do not
> >>  update the variable high_memory. This patch add a new e820 entry in e820 table,
> >>  and update max_pfn, max_low_pfn and high_memory.
> >>
> >> We add a function update_pfn in file arch/x86/mm/init.c to udpate these
> >>  varibles. Memory hotplug does not make sense on 32-bit kernel, so we did not
> >>  concern it in this function.
> >>
> > 
> > Memory hotplug makes sense on 32-bit kernels, at least in virtual
> > environments.
> 
> No VM currently supports it to my knowledge. They all use traditional
> balooning.
> 
> If someone adds that they can still fix it, but right now fixing it on 64bit
> is the important part.
> 
I wonder...with some modification, memory hotplug (or Mel's page coalescing)
can be used for balloning in MAX_ORDER page size.
I'm sorry if VM' baloon drivers has no fragmentaion problem.

Thanks,
-Kame

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-12  0:30         ` KAMEZAWA Hiroyuki
@ 2010-01-12  1:38           ` Andi Kleen
  -1 siblings, 0 replies; 39+ messages in thread
From: Andi Kleen @ 2010-01-12  1:38 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Wu Fengguang, Zheng, Shaohui, linux-mm, akpm, linux-kernel,
	y-goto, Dave Hansen, x86


> Hmmm....could you rewrite /dev/mem to use kernel/resource.c other than
> modifing e820 maps. ?

Sorry but responding to bug fixes with "could you please rewrite ..."  is
not considered fair. Shaohui is just trying to fix a bug here, not redesigning
a subsystem.


> Two reasons.
>   - e820map is considerted to be stable, read-only after boot.
>   - We don't need to add more x86 special codes.

We need working memory hotadd.

-Andi



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-12  1:38           ` Andi Kleen
  0 siblings, 0 replies; 39+ messages in thread
From: Andi Kleen @ 2010-01-12  1:38 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Wu Fengguang, Zheng, Shaohui, linux-mm, akpm, linux-kernel,
	y-goto, Dave Hansen, x86


> Hmmm....could you rewrite /dev/mem to use kernel/resource.c other than
> modifing e820 maps. ?

Sorry but responding to bug fixes with "could you please rewrite ..."  is
not considered fair. Shaohui is just trying to fix a bug here, not redesigning
a subsystem.


> Two reasons.
>   - e820map is considerted to be stable, read-only after boot.
>   - We don't need to add more x86 special codes.

We need working memory hotadd.

-Andi


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-12  1:38           ` Andi Kleen
@ 2010-01-12  1:39             ` KAMEZAWA Hiroyuki
  -1 siblings, 0 replies; 39+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-01-12  1:39 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Wu Fengguang, Zheng, Shaohui, linux-mm, akpm, linux-kernel,
	y-goto, Dave Hansen, x86

On Tue, 12 Jan 2010 02:38:09 +0100
Andi Kleen <ak@linux.intel.com> wrote:

> 
> > Hmmm....could you rewrite /dev/mem to use kernel/resource.c other than
> > modifing e820 maps. ?
> 
> Sorry but responding to bug fixes with "could you please rewrite ..."  is
> not considered fair. Shaohui is just trying to fix a bug here, not redesigning
> a subsystem.
> 
Quick hack for bug fix is okay to me. 

About this patch, I'm not sure whether we are allowed to rewrite e820map after
boot.

Thanks,
-Kame


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-12  1:39             ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 39+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-01-12  1:39 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Wu Fengguang, Zheng, Shaohui, linux-mm, akpm, linux-kernel,
	y-goto, Dave Hansen, x86

On Tue, 12 Jan 2010 02:38:09 +0100
Andi Kleen <ak@linux.intel.com> wrote:

> 
> > Hmmm....could you rewrite /dev/mem to use kernel/resource.c other than
> > modifing e820 maps. ?
> 
> Sorry but responding to bug fixes with "could you please rewrite ..."  is
> not considered fair. Shaohui is just trying to fix a bug here, not redesigning
> a subsystem.
> 
Quick hack for bug fix is okay to me. 

About this patch, I'm not sure whether we are allowed to rewrite e820map after
boot.

Thanks,
-Kame

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-12  1:39             ` KAMEZAWA Hiroyuki
@ 2010-01-12  1:50               ` KAMEZAWA Hiroyuki
  -1 siblings, 0 replies; 39+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-01-12  1:50 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Andi Kleen, Wu Fengguang, Zheng, Shaohui, linux-mm, akpm,
	linux-kernel, y-goto, Dave Hansen, x86

On Tue, 12 Jan 2010 10:39:44 +0900
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:

> On Tue, 12 Jan 2010 02:38:09 +0100
> Andi Kleen <ak@linux.intel.com> wrote:
> 
> > 
> > > Hmmm....could you rewrite /dev/mem to use kernel/resource.c other than
> > > modifing e820 maps. ?
> > 
> > Sorry but responding to bug fixes with "could you please rewrite ..."  is
> > not considered fair. Shaohui is just trying to fix a bug here, not redesigning
> > a subsystem.
> > 
> Quick hack for bug fix is okay to me. 
> 
Just an information.

We already check kenerke/resource.c's resource information, here.

read_mem()
	-> range_is_allowed()
		-> devmem_is_allowd()
			-> iomem_is_exclusive()

extra calls of page_is_ram() to ask architecture's map seems redundunt.

But, I know PPC guys doesn't use ioresource.c, hehe.

Thanks,
-Kame


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-12  1:50               ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 39+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-01-12  1:50 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Andi Kleen, Wu Fengguang, Zheng, Shaohui, linux-mm, akpm,
	linux-kernel, y-goto, Dave Hansen, x86

On Tue, 12 Jan 2010 10:39:44 +0900
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:

> On Tue, 12 Jan 2010 02:38:09 +0100
> Andi Kleen <ak@linux.intel.com> wrote:
> 
> > 
> > > Hmmm....could you rewrite /dev/mem to use kernel/resource.c other than
> > > modifing e820 maps. ?
> > 
> > Sorry but responding to bug fixes with "could you please rewrite ..."  is
> > not considered fair. Shaohui is just trying to fix a bug here, not redesigning
> > a subsystem.
> > 
> Quick hack for bug fix is okay to me. 
> 
Just an information.

We already check kenerke/resource.c's resource information, here.

read_mem()
	-> range_is_allowed()
		-> devmem_is_allowd()
			-> iomem_is_exclusive()

extra calls of page_is_ram() to ask architecture's map seems redundunt.

But, I know PPC guys doesn't use ioresource.c, hehe.

Thanks,
-Kame

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-12  0:30         ` KAMEZAWA Hiroyuki
@ 2010-01-12  2:33           ` Wu Fengguang
  -1 siblings, 0 replies; 39+ messages in thread
From: Wu Fengguang @ 2010-01-12  2:33 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Zheng, Shaohui, linux-mm, akpm, linux-kernel, ak, y-goto,
	Dave Hansen, x86

On Tue, Jan 12, 2010 at 08:30:31AM +0800, KAMEZAWA Hiroyuki wrote:
> On Mon, 11 Jan 2010 20:43:03 +0800
> Wu Fengguang <fengguang.wu@intel.com> wrote:
> 
> > > > +	/* if add to low memory, update max_low_pfn */
> > > > +	if (unlikely(start_pfn < limit_low_pfn)) {
> > > > +		if (end_pfn <= limit_low_pfn)
> > > > +			max_low_pfn = end_pfn;
> > > > +		else
> > > > +			max_low_pfn = limit_low_pfn;
> > > 
> > > X86_64 actually always set max_low_pfn=max_pfn, in setup_arch():
> > > [Zheng, Shaohui] there should be some misunderstanding, I read the
> > > code carefully, if the total memory is under 4G, it always
> > > max_low_pfn=max_pfn. If the total memory is larger than 4G,
> > > max_low_pfn means the end of low ram. It set
> > 
> > > max_low_pfn = e820_end_of_low_ram_pfn();.
> > 
> > The above line is very misleading.. In setup_arch(), it will be
> > overrode by the following block.
> > 
> 
> Hmmm....could you rewrite /dev/mem to use kernel/resource.c other than
> modifing e820 maps. ?
> Two reasons.
>   - e820map is considerted to be stable, read-only after boot.
>   - We don't need to add more x86 special codes.

Sure, here it is :)
---
x86: use the generic page_is_ram()

The generic resource based page_is_ram() works better with memory
hotplug/hotremove. So switch the x86 e820map based code to it.

CC: Andi Kleen <andi@firstfloor.org> 
CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> 
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 arch/x86/include/asm/page_types.h |    1 
 arch/x86/mm/ioremap.c             |   37 ----------------------------
 kernel/resource.c                 |   17 ++++++++++++
 3 files changed, 17 insertions(+), 38 deletions(-)

--- linux-mm.orig/arch/x86/include/asm/page_types.h	2010-01-12 10:31:01.000000000 +0800
+++ linux-mm/arch/x86/include/asm/page_types.h	2010-01-12 10:31:44.000000000 +0800
@@ -34,19 +34,18 @@
 
 #ifdef CONFIG_X86_64
 #include <asm/page_64_types.h>
 #else
 #include <asm/page_32_types.h>
 #endif	/* CONFIG_X86_64 */
 
 #ifndef __ASSEMBLY__
 
-extern int page_is_ram(unsigned long pagenr);
 extern int devmem_is_allowed(unsigned long pagenr);
 
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
 extern unsigned long init_memory_mapping(unsigned long start,
 					 unsigned long end);
 
 extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn,
--- linux-mm.orig/arch/x86/mm/ioremap.c	2010-01-12 10:31:01.000000000 +0800
+++ linux-mm/arch/x86/mm/ioremap.c	2010-01-12 10:31:44.000000000 +0800
@@ -18,55 +18,18 @@
 #include <asm/e820.h>
 #include <asm/fixmap.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
 #include <asm/pat.h>
 
 #include "physaddr.h"
 
-int page_is_ram(unsigned long pagenr)
-{
-	resource_size_t addr, end;
-	int i;
-
-	/*
-	 * A special case is the first 4Kb of memory;
-	 * This is a BIOS owned area, not kernel ram, but generally
-	 * not listed as such in the E820 table.
-	 */
-	if (pagenr == 0)
-		return 0;
-
-	/*
-	 * Second special case: Some BIOSen report the PC BIOS
-	 * area (640->1Mb) as ram even though it is not.
-	 */
-	if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
-		    pagenr < (BIOS_END >> PAGE_SHIFT))
-		return 0;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		/*
-		 * Not usable memory:
-		 */
-		if (e820.map[i].type != E820_RAM)
-			continue;
-		addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
-		end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
-
-
-		if ((pagenr >= addr) && (pagenr < end))
-			return 1;
-	}
-	return 0;
-}
-
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
  */
 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
 			       unsigned long prot_val)
 {
 	unsigned long nrpages = size >> PAGE_SHIFT;
 	int err;
--- linux-mm.orig/kernel/resource.c	2010-01-12 10:31:01.000000000 +0800
+++ linux-mm/kernel/resource.c	2010-01-12 10:31:44.000000000 +0800
@@ -298,18 +298,35 @@ int walk_system_ram_range(unsigned long 
 #endif
 
 static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
 {
 	return 24;
 }
 
 int __attribute__((weak)) page_is_ram(unsigned long pfn)
 {
+#ifdef CONFIG_X86
+	/*
+	 * A special case is the first 4Kb of memory;
+	 * This is a BIOS owned area, not kernel ram, but generally
+	 * not listed as such in the E820 table.
+	 */
+	if (pfn == 0)
+		return 0;
+
+	/*
+	 * Second special case: Some BIOSen report the PC BIOS
+	 * area (640->1Mb) as ram even though it is not.
+	 */
+	if (pfn >= (BIOS_BEGIN >> PAGE_SHIFT) &&
+	    pfn <  (BIOS_END   >> PAGE_SHIFT))
+		return 0;
+#endif
 	return 24 == walk_system_ram_range(pfn, 1, NULL, __is_ram);
 }
 
 /*
  * Find empty slot in the resource tree given range and alignment.
  */
 static int find_resource(struct resource *root, struct resource *new,
 			 resource_size_t size, resource_size_t min,
 			 resource_size_t max, resource_size_t align,


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-12  2:33           ` Wu Fengguang
  0 siblings, 0 replies; 39+ messages in thread
From: Wu Fengguang @ 2010-01-12  2:33 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Zheng, Shaohui, linux-mm, akpm, linux-kernel, ak, y-goto,
	Dave Hansen, x86

On Tue, Jan 12, 2010 at 08:30:31AM +0800, KAMEZAWA Hiroyuki wrote:
> On Mon, 11 Jan 2010 20:43:03 +0800
> Wu Fengguang <fengguang.wu@intel.com> wrote:
> 
> > > > +	/* if add to low memory, update max_low_pfn */
> > > > +	if (unlikely(start_pfn < limit_low_pfn)) {
> > > > +		if (end_pfn <= limit_low_pfn)
> > > > +			max_low_pfn = end_pfn;
> > > > +		else
> > > > +			max_low_pfn = limit_low_pfn;
> > > 
> > > X86_64 actually always set max_low_pfn=max_pfn, in setup_arch():
> > > [Zheng, Shaohui] there should be some misunderstanding, I read the
> > > code carefully, if the total memory is under 4G, it always
> > > max_low_pfn=max_pfn. If the total memory is larger than 4G,
> > > max_low_pfn means the end of low ram. It set
> > 
> > > max_low_pfn = e820_end_of_low_ram_pfn();.
> > 
> > The above line is very misleading.. In setup_arch(), it will be
> > overrode by the following block.
> > 
> 
> Hmmm....could you rewrite /dev/mem to use kernel/resource.c other than
> modifing e820 maps. ?
> Two reasons.
>   - e820map is considerted to be stable, read-only after boot.
>   - We don't need to add more x86 special codes.

Sure, here it is :)
---
x86: use the generic page_is_ram()

The generic resource based page_is_ram() works better with memory
hotplug/hotremove. So switch the x86 e820map based code to it.

CC: Andi Kleen <andi@firstfloor.org> 
CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> 
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 arch/x86/include/asm/page_types.h |    1 
 arch/x86/mm/ioremap.c             |   37 ----------------------------
 kernel/resource.c                 |   17 ++++++++++++
 3 files changed, 17 insertions(+), 38 deletions(-)

--- linux-mm.orig/arch/x86/include/asm/page_types.h	2010-01-12 10:31:01.000000000 +0800
+++ linux-mm/arch/x86/include/asm/page_types.h	2010-01-12 10:31:44.000000000 +0800
@@ -34,19 +34,18 @@
 
 #ifdef CONFIG_X86_64
 #include <asm/page_64_types.h>
 #else
 #include <asm/page_32_types.h>
 #endif	/* CONFIG_X86_64 */
 
 #ifndef __ASSEMBLY__
 
-extern int page_is_ram(unsigned long pagenr);
 extern int devmem_is_allowed(unsigned long pagenr);
 
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
 extern unsigned long init_memory_mapping(unsigned long start,
 					 unsigned long end);
 
 extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn,
--- linux-mm.orig/arch/x86/mm/ioremap.c	2010-01-12 10:31:01.000000000 +0800
+++ linux-mm/arch/x86/mm/ioremap.c	2010-01-12 10:31:44.000000000 +0800
@@ -18,55 +18,18 @@
 #include <asm/e820.h>
 #include <asm/fixmap.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
 #include <asm/pat.h>
 
 #include "physaddr.h"
 
-int page_is_ram(unsigned long pagenr)
-{
-	resource_size_t addr, end;
-	int i;
-
-	/*
-	 * A special case is the first 4Kb of memory;
-	 * This is a BIOS owned area, not kernel ram, but generally
-	 * not listed as such in the E820 table.
-	 */
-	if (pagenr == 0)
-		return 0;
-
-	/*
-	 * Second special case: Some BIOSen report the PC BIOS
-	 * area (640->1Mb) as ram even though it is not.
-	 */
-	if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
-		    pagenr < (BIOS_END >> PAGE_SHIFT))
-		return 0;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		/*
-		 * Not usable memory:
-		 */
-		if (e820.map[i].type != E820_RAM)
-			continue;
-		addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
-		end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
-
-
-		if ((pagenr >= addr) && (pagenr < end))
-			return 1;
-	}
-	return 0;
-}
-
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
  */
 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
 			       unsigned long prot_val)
 {
 	unsigned long nrpages = size >> PAGE_SHIFT;
 	int err;
--- linux-mm.orig/kernel/resource.c	2010-01-12 10:31:01.000000000 +0800
+++ linux-mm/kernel/resource.c	2010-01-12 10:31:44.000000000 +0800
@@ -298,18 +298,35 @@ int walk_system_ram_range(unsigned long 
 #endif
 
 static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
 {
 	return 24;
 }
 
 int __attribute__((weak)) page_is_ram(unsigned long pfn)
 {
+#ifdef CONFIG_X86
+	/*
+	 * A special case is the first 4Kb of memory;
+	 * This is a BIOS owned area, not kernel ram, but generally
+	 * not listed as such in the E820 table.
+	 */
+	if (pfn == 0)
+		return 0;
+
+	/*
+	 * Second special case: Some BIOSen report the PC BIOS
+	 * area (640->1Mb) as ram even though it is not.
+	 */
+	if (pfn >= (BIOS_BEGIN >> PAGE_SHIFT) &&
+	    pfn <  (BIOS_END   >> PAGE_SHIFT))
+		return 0;
+#endif
 	return 24 == walk_system_ram_range(pfn, 1, NULL, __is_ram);
 }
 
 /*
  * Find empty slot in the resource tree given range and alignment.
  */
 static int find_resource(struct resource *root, struct resource *new,
 			 resource_size_t size, resource_size_t min,
 			 resource_size_t max, resource_size_t align,

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-12  2:33           ` Wu Fengguang
@ 2010-01-12  2:39             ` KAMEZAWA Hiroyuki
  -1 siblings, 0 replies; 39+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-01-12  2:39 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Zheng, Shaohui, linux-mm, akpm, linux-kernel, ak, y-goto,
	Dave Hansen, x86

On Tue, 12 Jan 2010 10:33:08 +0800
Wu Fengguang <fengguang.wu@intel.com> wrote:

> Sure, here it is :)
> ---
> x86: use the generic page_is_ram()
> 
> The generic resource based page_is_ram() works better with memory
> hotplug/hotremove. So switch the x86 e820map based code to it.
> 
> CC: Andi Kleen <andi@firstfloor.org> 
> CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> 
> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>

Ack.


> +#ifdef CONFIG_X86
> +	/*
> +	 * A special case is the first 4Kb of memory;
> +	 * This is a BIOS owned area, not kernel ram, but generally
> +	 * not listed as such in the E820 table.
> +	 */
> +	if (pfn == 0)
> +		return 0;
> +
> +	/*
> +	 * Second special case: Some BIOSen report the PC BIOS
> +	 * area (640->1Mb) as ram even though it is not.
> +	 */
> +	if (pfn >= (BIOS_BEGIN >> PAGE_SHIFT) &&
> +	    pfn <  (BIOS_END   >> PAGE_SHIFT))
> +		return 0;
> +#endif

I'm glad if this part is sorted out in clean way ;)

Thanks,
-Kame


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-12  2:39             ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 39+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-01-12  2:39 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Zheng, Shaohui, linux-mm, akpm, linux-kernel, ak, y-goto,
	Dave Hansen, x86

On Tue, 12 Jan 2010 10:33:08 +0800
Wu Fengguang <fengguang.wu@intel.com> wrote:

> Sure, here it is :)
> ---
> x86: use the generic page_is_ram()
> 
> The generic resource based page_is_ram() works better with memory
> hotplug/hotremove. So switch the x86 e820map based code to it.
> 
> CC: Andi Kleen <andi@firstfloor.org> 
> CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> 
> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>

Ack.


> +#ifdef CONFIG_X86
> +	/*
> +	 * A special case is the first 4Kb of memory;
> +	 * This is a BIOS owned area, not kernel ram, but generally
> +	 * not listed as such in the E820 table.
> +	 */
> +	if (pfn == 0)
> +		return 0;
> +
> +	/*
> +	 * Second special case: Some BIOSen report the PC BIOS
> +	 * area (640->1Mb) as ram even though it is not.
> +	 */
> +	if (pfn >= (BIOS_BEGIN >> PAGE_SHIFT) &&
> +	    pfn <  (BIOS_END   >> PAGE_SHIFT))
> +		return 0;
> +#endif

I'm glad if this part is sorted out in clean way ;)

Thanks,
-Kame

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-12  1:50               ` KAMEZAWA Hiroyuki
@ 2010-01-12  2:45                 ` Wu Fengguang
  -1 siblings, 0 replies; 39+ messages in thread
From: Wu Fengguang @ 2010-01-12  2:45 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Andi Kleen, Zheng, Shaohui, linux-mm, akpm, linux-kernel, y-goto,
	Dave Hansen, x86

On Tue, Jan 12, 2010 at 09:50:12AM +0800, KAMEZAWA Hiroyuki wrote:

> Just an information.
> 
> We already check kenerke/resource.c's resource information, here.
> 
> read_mem()
> 	-> range_is_allowed()
> 		-> devmem_is_allowd()
> 			-> iomem_is_exclusive()
> 
> extra calls of page_is_ram() to ask architecture's map seems redundunt.
> 
> But, I know PPC guys doesn't use ioresource.c, hehe.

Another exception is !CONFIG_STRICT_DEVMEM, which makes
range_is_allowed()==1. So we still need the page_is_ram() :)

Thanks,
Fengguang


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-12  2:45                 ` Wu Fengguang
  0 siblings, 0 replies; 39+ messages in thread
From: Wu Fengguang @ 2010-01-12  2:45 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Andi Kleen, Zheng, Shaohui, linux-mm, akpm, linux-kernel, y-goto,
	Dave Hansen, x86

On Tue, Jan 12, 2010 at 09:50:12AM +0800, KAMEZAWA Hiroyuki wrote:

> Just an information.
> 
> We already check kenerke/resource.c's resource information, here.
> 
> read_mem()
> 	-> range_is_allowed()
> 		-> devmem_is_allowd()
> 			-> iomem_is_exclusive()
> 
> extra calls of page_is_ram() to ask architecture's map seems redundunt.
> 
> But, I know PPC guys doesn't use ioresource.c, hehe.

Another exception is !CONFIG_STRICT_DEVMEM, which makes
range_is_allowed()==1. So we still need the page_is_ram() :)

Thanks,
Fengguang

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-12  0:30         ` KAMEZAWA Hiroyuki
@ 2010-01-12  5:45           ` Zheng, Shaohui
  -1 siblings, 0 replies; 39+ messages in thread
From: Zheng, Shaohui @ 2010-01-12  5:45 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki, Wu, Fengguang
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, x86


Hmmm....could you rewrite /dev/mem to use kernel/resource.c other than
modifing e820 maps. ?
Two reasons.
  - e820map is considerted to be stable, read-only after boot.
  - We don't need to add more x86 special codes.
[Zheng, Shaohui] Kame, when I write this patch, I also feel confused whether update e820map. Because of the dependency in function page_is_ram, so we still update it in my patch.
I see that Fengguang already draft patches to change function page_is_ram, the new page_is_ram function use kernel/resource.c instead. That is great that we can still keep a stable e820map. I will resend the patch which update variable high_memory, max_low_pfn and max_pfn only.


Thanks,
-Kame


^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-12  5:45           ` Zheng, Shaohui
  0 siblings, 0 replies; 39+ messages in thread
From: Zheng, Shaohui @ 2010-01-12  5:45 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki, Wu, Fengguang
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, x86


Hmmm....could you rewrite /dev/mem to use kernel/resource.c other than
modifing e820 maps. ?
Two reasons.
  - e820map is considerted to be stable, read-only after boot.
  - We don't need to add more x86 special codes.
[Zheng, Shaohui] Kame, when I write this patch, I also feel confused whether update e820map. Because of the dependency in function page_is_ram, so we still update it in my patch.
I see that Fengguang already draft patches to change function page_is_ram, the new page_is_ram function use kernel/resource.c instead. That is great that we can still keep a stable e820map. I will resend the patch which update variable high_memory, max_low_pfn and max_pfn only.


Thanks,
-Kame

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-11 12:43       ` Wu Fengguang
@ 2010-01-12  5:51         ` Zheng, Shaohui
  -1 siblings, 0 replies; 39+ messages in thread
From: Zheng, Shaohui @ 2010-01-12  5:51 UTC (permalink / raw)
  To: Wu, Fengguang
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, x86,
	KAMEZAWA Hiroyuki

> > +	/* if add to low memory, update max_low_pfn */
> > +	if (unlikely(start_pfn < limit_low_pfn)) {
> > +		if (end_pfn <= limit_low_pfn)
> > +			max_low_pfn = end_pfn;
> > +		else
> > +			max_low_pfn = limit_low_pfn;
> 
> X86_64 actually always set max_low_pfn=max_pfn, in setup_arch():
> [Zheng, Shaohui] there should be some misunderstanding, I read the
> code carefully, if the total memory is under 4G, it always
> max_low_pfn=max_pfn. If the total memory is larger than 4G,
> max_low_pfn means the end of low ram. It set

> max_low_pfn = e820_end_of_low_ram_pfn();.

The above line is very misleading.. In setup_arch(), it will be
overrode by the following block.
[Zheng, Shaohui] yes, I misunderstand it because of this code. It seems that max_low_pfn == max_pfn is always true on x86_32 and x86_64.  Thanks fengguang to point it out.

>  899 #ifdef CONFIG_X86_64
>  900         if (max_pfn > max_low_pfn) {
>  901                 max_pfn_mapped = init_memory_mapping(1UL<<32,
>  902                                                      max_pfn<<PAGE_SHIFT);
>  903                 /* can we preseve max_low_pfn ?*/
>  904                 max_low_pfn = max_pfn;
>  905         }
>  906 #endif
 
Thanks,
Fengguang

^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-12  5:51         ` Zheng, Shaohui
  0 siblings, 0 replies; 39+ messages in thread
From: Zheng, Shaohui @ 2010-01-12  5:51 UTC (permalink / raw)
  To: Wu, Fengguang
  Cc: linux-mm, akpm, linux-kernel, ak, y-goto, Dave Hansen, x86,
	KAMEZAWA Hiroyuki

> > +	/* if add to low memory, update max_low_pfn */
> > +	if (unlikely(start_pfn < limit_low_pfn)) {
> > +		if (end_pfn <= limit_low_pfn)
> > +			max_low_pfn = end_pfn;
> > +		else
> > +			max_low_pfn = limit_low_pfn;
> 
> X86_64 actually always set max_low_pfn=max_pfn, in setup_arch():
> [Zheng, Shaohui] there should be some misunderstanding, I read the
> code carefully, if the total memory is under 4G, it always
> max_low_pfn=max_pfn. If the total memory is larger than 4G,
> max_low_pfn means the end of low ram. It set

> max_low_pfn = e820_end_of_low_ram_pfn();.

The above line is very misleading.. In setup_arch(), it will be
overrode by the following block.
[Zheng, Shaohui] yes, I misunderstand it because of this code. It seems that max_low_pfn == max_pfn is always true on x86_32 and x86_64.  Thanks fengguang to point it out.

>  899 #ifdef CONFIG_X86_64
>  900         if (max_pfn > max_low_pfn) {
>  901                 max_pfn_mapped = init_memory_mapping(1UL<<32,
>  902                                                      max_pfn<<PAGE_SHIFT);
>  903                 /* can we preseve max_low_pfn ?*/
>  904                 max_low_pfn = max_pfn;
>  905         }
>  906 #endif
 
Thanks,
Fengguang

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-12  2:39             ` KAMEZAWA Hiroyuki
@ 2010-01-12 13:35               ` Wu Fengguang
  -1 siblings, 0 replies; 39+ messages in thread
From: Wu Fengguang @ 2010-01-12 13:35 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Zheng, Shaohui, linux-mm, akpm, linux-kernel, ak, y-goto,
	Dave Hansen, x86, Yinghai Lu

On Tue, Jan 12, 2010 at 10:39:03AM +0800, KAMEZAWA Hiroyuki wrote:
> On Tue, 12 Jan 2010 10:33:08 +0800
> Wu Fengguang <fengguang.wu@intel.com> wrote:
> 
> > Sure, here it is :)
> > ---
> > x86: use the generic page_is_ram()
> > 
> > The generic resource based page_is_ram() works better with memory
> > hotplug/hotremove. So switch the x86 e820map based code to it.
> > 
> > CC: Andi Kleen <andi@firstfloor.org> 
> > CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> 
> > Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
> 
> Ack.

Thank you.

> 
> > +#ifdef CONFIG_X86
> > +	/*
> > +	 * A special case is the first 4Kb of memory;
> > +	 * This is a BIOS owned area, not kernel ram, but generally
> > +	 * not listed as such in the E820 table.
> > +	 */
> > +	if (pfn == 0)
> > +		return 0;
> > +
> > +	/*
> > +	 * Second special case: Some BIOSen report the PC BIOS
> > +	 * area (640->1Mb) as ram even though it is not.
> > +	 */
> > +	if (pfn >= (BIOS_BEGIN >> PAGE_SHIFT) &&
> > +	    pfn <  (BIOS_END   >> PAGE_SHIFT))
> > +		return 0;
> > +#endif
> 
> I'm glad if this part is sorted out in clean way ;)

Two possible solutions are:

- to exclude the above two ranges directly in e820 map;
- to not add the above two ranges into iomem_resource. 

Yinghai, do you have any suggestions?
We want to get rid of the two explicit tests from page_is_ram().

Thanks,
Fengguang


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-12 13:35               ` Wu Fengguang
  0 siblings, 0 replies; 39+ messages in thread
From: Wu Fengguang @ 2010-01-12 13:35 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Zheng, Shaohui, linux-mm, akpm, linux-kernel, ak, y-goto,
	Dave Hansen, x86, Yinghai Lu

On Tue, Jan 12, 2010 at 10:39:03AM +0800, KAMEZAWA Hiroyuki wrote:
> On Tue, 12 Jan 2010 10:33:08 +0800
> Wu Fengguang <fengguang.wu@intel.com> wrote:
> 
> > Sure, here it is :)
> > ---
> > x86: use the generic page_is_ram()
> > 
> > The generic resource based page_is_ram() works better with memory
> > hotplug/hotremove. So switch the x86 e820map based code to it.
> > 
> > CC: Andi Kleen <andi@firstfloor.org> 
> > CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> 
> > Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
> 
> Ack.

Thank you.

> 
> > +#ifdef CONFIG_X86
> > +	/*
> > +	 * A special case is the first 4Kb of memory;
> > +	 * This is a BIOS owned area, not kernel ram, but generally
> > +	 * not listed as such in the E820 table.
> > +	 */
> > +	if (pfn == 0)
> > +		return 0;
> > +
> > +	/*
> > +	 * Second special case: Some BIOSen report the PC BIOS
> > +	 * area (640->1Mb) as ram even though it is not.
> > +	 */
> > +	if (pfn >= (BIOS_BEGIN >> PAGE_SHIFT) &&
> > +	    pfn <  (BIOS_END   >> PAGE_SHIFT))
> > +		return 0;
> > +#endif
> 
> I'm glad if this part is sorted out in clean way ;)

Two possible solutions are:

- to exclude the above two ranges directly in e820 map;
- to not add the above two ranges into iomem_resource. 

Yinghai, do you have any suggestions?
We want to get rid of the two explicit tests from page_is_ram().

Thanks,
Fengguang

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface  /dev/mem for 64-bit kernel(v1)
  2010-01-12 13:35               ` Wu Fengguang
@ 2010-01-12 23:01                 ` Yinghai Lu
  -1 siblings, 0 replies; 39+ messages in thread
From: Yinghai Lu @ 2010-01-12 23:01 UTC (permalink / raw)
  To: Wu Fengguang, Ingo Molnar, H. Peter Anvin, akpm
  Cc: KAMEZAWA Hiroyuki, Zheng, Shaohui, linux-mm, linux-kernel, ak,
	y-goto, Dave Hansen, x86

[-- Attachment #1: Type: text/plain, Size: 1617 bytes --]

On Tue, Jan 12, 2010 at 5:35 AM, Wu Fengguang <fengguang.wu@intel.com> wrote:
> On Tue, Jan 12, 2010 at 10:39:03AM +0800, KAMEZAWA Hiroyuki wrote:
>> On Tue, 12 Jan 2010 10:33:08 +0800
>> Wu Fengguang <fengguang.wu@intel.com> wrote:
>>
>> > Sure, here it is :)
>> > ---
>> > x86: use the generic page_is_ram()
>> >
>> > The generic resource based page_is_ram() works better with memory
>> > hotplug/hotremove. So switch the x86 e820map based code to it.
>> >
>> > CC: Andi Kleen <andi@firstfloor.org>
>> > CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
>> > Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
>>
>> Ack.
>
> Thank you.
>
>>
>> > +#ifdef CONFIG_X86
>> > +   /*
>> > +    * A special case is the first 4Kb of memory;
>> > +    * This is a BIOS owned area, not kernel ram, but generally
>> > +    * not listed as such in the E820 table.
>> > +    */
>> > +   if (pfn == 0)
>> > +           return 0;
>> > +
>> > +   /*
>> > +    * Second special case: Some BIOSen report the PC BIOS
>> > +    * area (640->1Mb) as ram even though it is not.
>> > +    */
>> > +   if (pfn >= (BIOS_BEGIN >> PAGE_SHIFT) &&
>> > +       pfn <  (BIOS_END   >> PAGE_SHIFT))
>> > +           return 0;
>> > +#endif
>>
>> I'm glad if this part is sorted out in clean way ;)
>
> Two possible solutions are:
>
> - to exclude the above two ranges directly in e820 map;
> - to not add the above two ranges into iomem_resource.
>
> Yinghai, do you have any suggestions?
> We want to get rid of the two explicit tests from page_is_ram().

please check attached patch.

YH

[-- Attachment #2: remove_bios_begin_end.patch --]
[-- Type: text/x-diff, Size: 4087 bytes --]

[PATCH] x86: remove bios data range from e820

to prepare move page_is_ram as generic one

Signed-off-by: Yinghai Lu <yinghai@kernel.org.

---
 arch/x86/kernel/e820.c   |    8 ++++++++
 arch/x86/kernel/head32.c |    2 --
 arch/x86/kernel/head64.c |    2 --
 arch/x86/kernel/setup.c  |   19 ++++++++++++++++++-
 arch/x86/mm/ioremap.c    |   16 ----------------
 5 files changed, 26 insertions(+), 21 deletions(-)

Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -657,6 +657,23 @@ static struct dmi_system_id __initdata b
 	{}
 };
 
+static void __init trim_bios_range(void)
+{
+	/*
+	 * A special case is the first 4Kb of memory;
+	 * This is a BIOS owned area, not kernel ram, but generally
+	 * not listed as such in the E820 table.
+	 */
+	e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
+	/*
+	 * special case: Some BIOSen report the PC BIOS
+	 * area (640->1Mb) as ram even though it is not.
+	 * take them out.
+	 */
+	e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
+	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+}
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -820,7 +837,7 @@ void __init setup_arch(char **cmdline_p)
 	insert_resource(&iomem_resource, &data_resource);
 	insert_resource(&iomem_resource, &bss_resource);
 
-
+	trim_bios_range();
 #ifdef CONFIG_X86_32
 	if (ppro_with_ram_bug()) {
 		e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -509,11 +509,19 @@ u64 __init e820_remove_range(u64 start,
 			     int checktype)
 {
 	int i;
+	u64 end;
 	u64 real_removed_size = 0;
 
 	if (size > (ULLONG_MAX - start))
 		size = ULLONG_MAX - start;
 
+	end = start + size;
+	printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
+		       (unsigned long long) start,
+		       (unsigned long long) end);
+	e820_print_type(old_type);
+	printk(KERN_CONT "\n");
+
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
 		u64 final_start, final_end;
Index: linux-2.6/arch/x86/mm/ioremap.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/ioremap.c
+++ linux-2.6/arch/x86/mm/ioremap.c
@@ -29,22 +29,6 @@ int page_is_ram(unsigned long pagenr)
 	resource_size_t addr, end;
 	int i;
 
-	/*
-	 * A special case is the first 4Kb of memory;
-	 * This is a BIOS owned area, not kernel ram, but generally
-	 * not listed as such in the E820 table.
-	 */
-	if (pagenr == 0)
-		return 0;
-
-	/*
-	 * Second special case: Some BIOSen report the PC BIOS
-	 * area (640->1Mb) as ram even though it is not.
-	 */
-	if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
-		    pagenr < (BIOS_END >> PAGE_SHIFT))
-		return 0;
-
 	for (i = 0; i < e820.nr_map; i++) {
 		/*
 		 * Not usable memory:
Index: linux-2.6/arch/x86/kernel/head32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head32.c
+++ linux-2.6/arch/x86/kernel/head32.c
@@ -29,8 +29,6 @@ static void __init i386_default_early_se
 
 void __init i386_start_kernel(void)
 {
-	reserve_early_overlap_ok(0, PAGE_SIZE, "BIOS data page");
-
 #ifdef CONFIG_X86_TRAMPOLINE
 	/*
 	 * But first pinch a few for the stack/trampoline stuff
Index: linux-2.6/arch/x86/kernel/head64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head64.c
+++ linux-2.6/arch/x86/kernel/head64.c
@@ -98,8 +98,6 @@ void __init x86_64_start_reservations(ch
 {
 	copy_bootdata(__va(real_mode_data));
 
-	reserve_early_overlap_ok(0, PAGE_SIZE, "BIOS data page");
-
 	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
 
 #ifdef CONFIG_BLK_DEV_INITRD

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-12 23:01                 ` Yinghai Lu
  0 siblings, 0 replies; 39+ messages in thread
From: Yinghai Lu @ 2010-01-12 23:01 UTC (permalink / raw)
  To: Wu Fengguang, Ingo Molnar, H. Peter Anvin, akpm
  Cc: KAMEZAWA Hiroyuki, Zheng, Shaohui, linux-mm, linux-kernel, ak,
	y-goto, Dave Hansen, x86

[-- Attachment #1: Type: text/plain, Size: 1617 bytes --]

On Tue, Jan 12, 2010 at 5:35 AM, Wu Fengguang <fengguang.wu@intel.com> wrote:
> On Tue, Jan 12, 2010 at 10:39:03AM +0800, KAMEZAWA Hiroyuki wrote:
>> On Tue, 12 Jan 2010 10:33:08 +0800
>> Wu Fengguang <fengguang.wu@intel.com> wrote:
>>
>> > Sure, here it is :)
>> > ---
>> > x86: use the generic page_is_ram()
>> >
>> > The generic resource based page_is_ram() works better with memory
>> > hotplug/hotremove. So switch the x86 e820map based code to it.
>> >
>> > CC: Andi Kleen <andi@firstfloor.org>
>> > CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
>> > Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
>>
>> Ack.
>
> Thank you.
>
>>
>> > +#ifdef CONFIG_X86
>> > +   /*
>> > +    * A special case is the first 4Kb of memory;
>> > +    * This is a BIOS owned area, not kernel ram, but generally
>> > +    * not listed as such in the E820 table.
>> > +    */
>> > +   if (pfn == 0)
>> > +           return 0;
>> > +
>> > +   /*
>> > +    * Second special case: Some BIOSen report the PC BIOS
>> > +    * area (640->1Mb) as ram even though it is not.
>> > +    */
>> > +   if (pfn >= (BIOS_BEGIN >> PAGE_SHIFT) &&
>> > +       pfn <  (BIOS_END   >> PAGE_SHIFT))
>> > +           return 0;
>> > +#endif
>>
>> I'm glad if this part is sorted out in clean way ;)
>
> Two possible solutions are:
>
> - to exclude the above two ranges directly in e820 map;
> - to not add the above two ranges into iomem_resource.
>
> Yinghai, do you have any suggestions?
> We want to get rid of the two explicit tests from page_is_ram().

please check attached patch.

YH

[-- Attachment #2: remove_bios_begin_end.patch --]
[-- Type: text/x-diff, Size: 4087 bytes --]

[PATCH] x86: remove bios data range from e820

to prepare move page_is_ram as generic one

Signed-off-by: Yinghai Lu <yinghai@kernel.org.

---
 arch/x86/kernel/e820.c   |    8 ++++++++
 arch/x86/kernel/head32.c |    2 --
 arch/x86/kernel/head64.c |    2 --
 arch/x86/kernel/setup.c  |   19 ++++++++++++++++++-
 arch/x86/mm/ioremap.c    |   16 ----------------
 5 files changed, 26 insertions(+), 21 deletions(-)

Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -657,6 +657,23 @@ static struct dmi_system_id __initdata b
 	{}
 };
 
+static void __init trim_bios_range(void)
+{
+	/*
+	 * A special case is the first 4Kb of memory;
+	 * This is a BIOS owned area, not kernel ram, but generally
+	 * not listed as such in the E820 table.
+	 */
+	e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
+	/*
+	 * special case: Some BIOSen report the PC BIOS
+	 * area (640->1Mb) as ram even though it is not.
+	 * take them out.
+	 */
+	e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
+	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+}
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -820,7 +837,7 @@ void __init setup_arch(char **cmdline_p)
 	insert_resource(&iomem_resource, &data_resource);
 	insert_resource(&iomem_resource, &bss_resource);
 
-
+	trim_bios_range();
 #ifdef CONFIG_X86_32
 	if (ppro_with_ram_bug()) {
 		e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -509,11 +509,19 @@ u64 __init e820_remove_range(u64 start,
 			     int checktype)
 {
 	int i;
+	u64 end;
 	u64 real_removed_size = 0;
 
 	if (size > (ULLONG_MAX - start))
 		size = ULLONG_MAX - start;
 
+	end = start + size;
+	printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
+		       (unsigned long long) start,
+		       (unsigned long long) end);
+	e820_print_type(old_type);
+	printk(KERN_CONT "\n");
+
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
 		u64 final_start, final_end;
Index: linux-2.6/arch/x86/mm/ioremap.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/ioremap.c
+++ linux-2.6/arch/x86/mm/ioremap.c
@@ -29,22 +29,6 @@ int page_is_ram(unsigned long pagenr)
 	resource_size_t addr, end;
 	int i;
 
-	/*
-	 * A special case is the first 4Kb of memory;
-	 * This is a BIOS owned area, not kernel ram, but generally
-	 * not listed as such in the E820 table.
-	 */
-	if (pagenr == 0)
-		return 0;
-
-	/*
-	 * Second special case: Some BIOSen report the PC BIOS
-	 * area (640->1Mb) as ram even though it is not.
-	 */
-	if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
-		    pagenr < (BIOS_END >> PAGE_SHIFT))
-		return 0;
-
 	for (i = 0; i < e820.nr_map; i++) {
 		/*
 		 * Not usable memory:
Index: linux-2.6/arch/x86/kernel/head32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head32.c
+++ linux-2.6/arch/x86/kernel/head32.c
@@ -29,8 +29,6 @@ static void __init i386_default_early_se
 
 void __init i386_start_kernel(void)
 {
-	reserve_early_overlap_ok(0, PAGE_SIZE, "BIOS data page");
-
 #ifdef CONFIG_X86_TRAMPOLINE
 	/*
 	 * But first pinch a few for the stack/trampoline stuff
Index: linux-2.6/arch/x86/kernel/head64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head64.c
+++ linux-2.6/arch/x86/kernel/head64.c
@@ -98,8 +98,6 @@ void __init x86_64_start_reservations(ch
 {
 	copy_bootdata(__va(real_mode_data));
 
-	reserve_early_overlap_ok(0, PAGE_SIZE, "BIOS data page");
-
 	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
 
 #ifdef CONFIG_BLK_DEV_INITRD

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
  2010-01-12 23:01                 ` Yinghai Lu
@ 2010-01-13  2:29                   ` Wu Fengguang
  -1 siblings, 0 replies; 39+ messages in thread
From: Wu Fengguang @ 2010-01-13  2:29 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, H. Peter Anvin, akpm, KAMEZAWA Hiroyuki, Zheng,
	Shaohui, linux-mm, linux-kernel, ak, y-goto, Dave Hansen, x86

On Wed, Jan 13, 2010 at 07:01:47AM +0800, Yinghai Lu wrote:
> On Tue, Jan 12, 2010 at 5:35 AM, Wu Fengguang <fengguang.wu@intel.com> wrote:
> > On Tue, Jan 12, 2010 at 10:39:03AM +0800, KAMEZAWA Hiroyuki wrote:
> >> On Tue, 12 Jan 2010 10:33:08 +0800
> >> Wu Fengguang <fengguang.wu@intel.com> wrote:
> >>
> >> > Sure, here it is :)
> >> > ---
> >> > x86: use the generic page_is_ram()
> >> >
> >> > The generic resource based page_is_ram() works better with memory
> >> > hotplug/hotremove. So switch the x86 e820map based code to it.
> >> >
> >> > CC: Andi Kleen <andi@firstfloor.org>
> >> > CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> >> > Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
> >>
> >> Ack.
> >
> > Thank you.
> >
> >>
> >> > +#ifdef CONFIG_X86
> >> > +   /*
> >> > +    * A special case is the first 4Kb of memory;
> >> > +    * This is a BIOS owned area, not kernel ram, but generally
> >> > +    * not listed as such in the E820 table.
> >> > +    */
> >> > +   if (pfn == 0)
> >> > +           return 0;
> >> > +
> >> > +   /*
> >> > +    * Second special case: Some BIOSen report the PC BIOS
> >> > +    * area (640->1Mb) as ram even though it is not.
> >> > +    */
> >> > +   if (pfn >= (BIOS_BEGIN >> PAGE_SHIFT) &&
> >> > +       pfn <  (BIOS_END   >> PAGE_SHIFT))
> >> > +           return 0;
> >> > +#endif
> >>
> >> I'm glad if this part is sorted out in clean way ;)
> >
> > Two possible solutions are:
> >
> > - to exclude the above two ranges directly in e820 map;
> > - to not add the above two ranges into iomem_resource.
> >
> > Yinghai, do you have any suggestions?
> > We want to get rid of the two explicit tests from page_is_ram().
> 
> please check attached patch.
> 
> YH

Thank you, it works!

Content-Description: remove_bios_begin_end.patch
> [PATCH] x86: remove bios data range from e820
> 
> to prepare move page_is_ram as generic one
> 
> Signed-off-by: Yinghai Lu <yinghai@kernel.org.

Malformed email address..

> ---
>  arch/x86/kernel/e820.c   |    8 ++++++++
>  arch/x86/kernel/head32.c |    2 --
>  arch/x86/kernel/head64.c |    2 --
>  arch/x86/kernel/setup.c  |   19 ++++++++++++++++++-
>  arch/x86/mm/ioremap.c    |   16 ----------------
>  5 files changed, 26 insertions(+), 21 deletions(-)
> 
> Index: linux-2.6/arch/x86/kernel/setup.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/setup.c
> +++ linux-2.6/arch/x86/kernel/setup.c
> @@ -657,6 +657,23 @@ static struct dmi_system_id __initdata b
>  	{}
>  };
>  
> +static void __init trim_bios_range(void)

How about e820_trim_bios_range() ?

> +{
> +	/*
> +	 * A special case is the first 4Kb of memory;
> +	 * This is a BIOS owned area, not kernel ram, but generally
> +	 * not listed as such in the E820 table.
> +	 */
> +	e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
> +	/*
> +	 * special case: Some BIOSen report the PC BIOS
> +	 * area (640->1Mb) as ram even though it is not.
> +	 * take them out.
> +	 */
> +	e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
> +	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
> +}
> +


> Index: linux-2.6/arch/x86/kernel/head32.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/head32.c
> +++ linux-2.6/arch/x86/kernel/head32.c
> @@ -29,8 +29,6 @@ static void __init i386_default_early_se
>  
>  void __init i386_start_kernel(void)
>  {
> -	reserve_early_overlap_ok(0, PAGE_SIZE, "BIOS data page");
> -
>  #ifdef CONFIG_X86_TRAMPOLINE
>  	/*
>  	 * But first pinch a few for the stack/trampoline stuff
> Index: linux-2.6/arch/x86/kernel/head64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/head64.c
> +++ linux-2.6/arch/x86/kernel/head64.c
> @@ -98,8 +98,6 @@ void __init x86_64_start_reservations(ch
>  {
>  	copy_bootdata(__va(real_mode_data));
>  
> -	reserve_early_overlap_ok(0, PAGE_SIZE, "BIOS data page");
> -
>  	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
>  
>  #ifdef CONFIG_BLK_DEV_INITRD

The above two trunks don't apply in latest linux-next.
Not a big problem for my test though.

Thanks,
Fengguang

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
@ 2010-01-13  2:29                   ` Wu Fengguang
  0 siblings, 0 replies; 39+ messages in thread
From: Wu Fengguang @ 2010-01-13  2:29 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, H. Peter Anvin, akpm, KAMEZAWA Hiroyuki, Zheng,
	Shaohui, linux-mm, linux-kernel, ak, y-goto, Dave Hansen, x86

On Wed, Jan 13, 2010 at 07:01:47AM +0800, Yinghai Lu wrote:
> On Tue, Jan 12, 2010 at 5:35 AM, Wu Fengguang <fengguang.wu@intel.com> wrote:
> > On Tue, Jan 12, 2010 at 10:39:03AM +0800, KAMEZAWA Hiroyuki wrote:
> >> On Tue, 12 Jan 2010 10:33:08 +0800
> >> Wu Fengguang <fengguang.wu@intel.com> wrote:
> >>
> >> > Sure, here it is :)
> >> > ---
> >> > x86: use the generic page_is_ram()
> >> >
> >> > The generic resource based page_is_ram() works better with memory
> >> > hotplug/hotremove. So switch the x86 e820map based code to it.
> >> >
> >> > CC: Andi Kleen <andi@firstfloor.org>
> >> > CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> >> > Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
> >>
> >> Ack.
> >
> > Thank you.
> >
> >>
> >> > +#ifdef CONFIG_X86
> >> > + A  /*
> >> > + A  A * A special case is the first 4Kb of memory;
> >> > + A  A * This is a BIOS owned area, not kernel ram, but generally
> >> > + A  A * not listed as such in the E820 table.
> >> > + A  A */
> >> > + A  if (pfn == 0)
> >> > + A  A  A  A  A  return 0;
> >> > +
> >> > + A  /*
> >> > + A  A * Second special case: Some BIOSen report the PC BIOS
> >> > + A  A * area (640->1Mb) as ram even though it is not.
> >> > + A  A */
> >> > + A  if (pfn >= (BIOS_BEGIN >> PAGE_SHIFT) &&
> >> > + A  A  A  pfn < A (BIOS_END A  >> PAGE_SHIFT))
> >> > + A  A  A  A  A  return 0;
> >> > +#endif
> >>
> >> I'm glad if this part is sorted out in clean way ;)
> >
> > Two possible solutions are:
> >
> > - to exclude the above two ranges directly in e820 map;
> > - to not add the above two ranges into iomem_resource.
> >
> > Yinghai, do you have any suggestions?
> > We want to get rid of the two explicit tests from page_is_ram().
> 
> please check attached patch.
> 
> YH

Thank you, it works!

Content-Description: remove_bios_begin_end.patch
> [PATCH] x86: remove bios data range from e820
> 
> to prepare move page_is_ram as generic one
> 
> Signed-off-by: Yinghai Lu <yinghai@kernel.org.

Malformed email address..

> ---
>  arch/x86/kernel/e820.c   |    8 ++++++++
>  arch/x86/kernel/head32.c |    2 --
>  arch/x86/kernel/head64.c |    2 --
>  arch/x86/kernel/setup.c  |   19 ++++++++++++++++++-
>  arch/x86/mm/ioremap.c    |   16 ----------------
>  5 files changed, 26 insertions(+), 21 deletions(-)
> 
> Index: linux-2.6/arch/x86/kernel/setup.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/setup.c
> +++ linux-2.6/arch/x86/kernel/setup.c
> @@ -657,6 +657,23 @@ static struct dmi_system_id __initdata b
>  	{}
>  };
>  
> +static void __init trim_bios_range(void)

How about e820_trim_bios_range() ?

> +{
> +	/*
> +	 * A special case is the first 4Kb of memory;
> +	 * This is a BIOS owned area, not kernel ram, but generally
> +	 * not listed as such in the E820 table.
> +	 */
> +	e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
> +	/*
> +	 * special case: Some BIOSen report the PC BIOS
> +	 * area (640->1Mb) as ram even though it is not.
> +	 * take them out.
> +	 */
> +	e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
> +	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
> +}
> +


> Index: linux-2.6/arch/x86/kernel/head32.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/head32.c
> +++ linux-2.6/arch/x86/kernel/head32.c
> @@ -29,8 +29,6 @@ static void __init i386_default_early_se
>  
>  void __init i386_start_kernel(void)
>  {
> -	reserve_early_overlap_ok(0, PAGE_SIZE, "BIOS data page");
> -
>  #ifdef CONFIG_X86_TRAMPOLINE
>  	/*
>  	 * But first pinch a few for the stack/trampoline stuff
> Index: linux-2.6/arch/x86/kernel/head64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/head64.c
> +++ linux-2.6/arch/x86/kernel/head64.c
> @@ -98,8 +98,6 @@ void __init x86_64_start_reservations(ch
>  {
>  	copy_bootdata(__va(real_mode_data));
>  
> -	reserve_early_overlap_ok(0, PAGE_SIZE, "BIOS data page");
> -
>  	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
>  
>  #ifdef CONFIG_BLK_DEV_INITRD

The above two trunks don't apply in latest linux-next.
Not a big problem for my test though.

Thanks,
Fengguang

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2010-01-13  2:29 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-01-08  3:32 [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1) Zheng, Shaohui
2010-01-08  5:02 ` H. Peter Anvin
2010-01-08  5:02   ` H. Peter Anvin
2010-01-08  5:18   ` Zheng, Shaohui
2010-01-08  5:18     ` Zheng, Shaohui
2010-01-08 19:47   ` Andi Kleen
2010-01-08 19:47     ` Andi Kleen
2010-01-12  0:58     ` KAMEZAWA Hiroyuki
2010-01-12  0:58       ` KAMEZAWA Hiroyuki
2010-01-08 12:48 ` Wu Fengguang
2010-01-08 12:48   ` Wu Fengguang
2010-01-11  2:20   ` Zheng, Shaohui
2010-01-11  2:20     ` Zheng, Shaohui
2010-01-11 12:43     ` Wu Fengguang
2010-01-11 12:43       ` Wu Fengguang
2010-01-12  0:30       ` KAMEZAWA Hiroyuki
2010-01-12  0:30         ` KAMEZAWA Hiroyuki
2010-01-12  1:38         ` Andi Kleen
2010-01-12  1:38           ` Andi Kleen
2010-01-12  1:39           ` KAMEZAWA Hiroyuki
2010-01-12  1:39             ` KAMEZAWA Hiroyuki
2010-01-12  1:50             ` KAMEZAWA Hiroyuki
2010-01-12  1:50               ` KAMEZAWA Hiroyuki
2010-01-12  2:45               ` Wu Fengguang
2010-01-12  2:45                 ` Wu Fengguang
2010-01-12  2:33         ` Wu Fengguang
2010-01-12  2:33           ` Wu Fengguang
2010-01-12  2:39           ` KAMEZAWA Hiroyuki
2010-01-12  2:39             ` KAMEZAWA Hiroyuki
2010-01-12 13:35             ` Wu Fengguang
2010-01-12 13:35               ` Wu Fengguang
2010-01-12 23:01               ` Yinghai Lu
2010-01-12 23:01                 ` Yinghai Lu
2010-01-13  2:29                 ` Wu Fengguang
2010-01-13  2:29                   ` Wu Fengguang
2010-01-12  5:45         ` Zheng, Shaohui
2010-01-12  5:45           ` Zheng, Shaohui
2010-01-12  5:51       ` Zheng, Shaohui
2010-01-12  5:51         ` Zheng, Shaohui

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.