All of lore.kernel.org
 help / color / mirror / Atom feed
* [BUGFIX][PATCH 1/2] x86, mem: separate x86_64 vmalloc_sync_all() into separate functions
@ 2010-08-20  9:45 ` Haicheng Li
  0 siblings, 0 replies; 14+ messages in thread
From: Haicheng Li @ 2010-08-20  9:45 UTC (permalink / raw)
  To: H. Peter Anvin, Ingo Molnar, Thomas Gleixner, akpm
  Cc: ak, Wu Fengguang, linux-kernel, linux-mm

hello,

Resend these two patches for bug fixing:

The bug is that when memory hotplug-adding happens for a large enough area that a new PGD entry is
needed for the direct mapping, the PGDs of other processes would not get updated. This leads to some
CPUs oopsing when they have to access the unmapped areas, e.g. onlining CPUs on the new added node.

Thanks!

---
 From 04d9fc860db40f15ad629f8b341ff84b83a00a8d Mon Sep 17 00:00:00 2001
From: Haicheng Li <haicheng.li@linux.intel.com>
Date: Wed, 19 May 2010 17:42:14 +0800
Subject: [PATCH] x86, mem-hotplug: separate x86_64 vmalloc_sync_all() into separate functions

No behavior change here.

Move some of vmalloc_sync_all() code into a new function
sync_global_pgds() that will be useful for memory hotplug.

Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
CC: Andi Kleen <ak@linux.intel.com>
---
  arch/x86/include/asm/pgtable_64.h |    2 ++
  arch/x86/mm/fault.c               |   24 +-----------------------
  arch/x86/mm/init_64.c             |   30 ++++++++++++++++++++++++++++++
  3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 181be52..317026d 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -102,6 +102,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
  	native_set_pgd(pgd, native_make_pgd(0));
  }

+extern void sync_global_pgds(unsigned long start, unsigned long end);
+
  /*
   * Conversion functions: convert a page and protection to a page entry,
   * and a page entry and page directory to the page they refer to.
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f627779..7ae0897 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -326,29 +326,7 @@ out:

  void vmalloc_sync_all(void)
  {
-	unsigned long address;
-
-	for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
-	     address += PGDIR_SIZE) {
-
-		const pgd_t *pgd_ref = pgd_offset_k(address);
-		unsigned long flags;
-		struct page *page;
-
-		if (pgd_none(*pgd_ref))
-			continue;
-
-		spin_lock_irqsave(&pgd_lock, flags);
-		list_for_each_entry(page, &pgd_list, lru) {
-			pgd_t *pgd;
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			if (pgd_none(*pgd))
-				set_pgd(pgd, *pgd_ref);
-			else
-				BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-		}
-		spin_unlock_irqrestore(&pgd_lock, flags);
-	}
+	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
  }

  /*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ee41bba..b0c3df0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -98,6 +98,36 @@ static int __init nonx32_setup(char *str)
  __setup("noexec32=", nonx32_setup);

  /*
+ * When memory was added/removed make sure all the processes MM have
+ * suitable PGD entries in the local PGD level page.
+ */
+void sync_global_pgds(unsigned long start, unsigned long end)
+{
+	unsigned long address;
+
+	for (address = start; address <= end; address += PGDIR_SIZE) {
+		const pgd_t *pgd_ref = pgd_offset_k(address);
+		unsigned long flags;
+		struct page *page;
+
+		if (pgd_none(*pgd_ref))
+			continue;
+
+		spin_lock_irqsave(&pgd_lock, flags);
+		list_for_each_entry(page, &pgd_list, lru) {
+			pgd_t *pgd;
+			pgd = (pgd_t *)page_address(page) + pgd_index(address);
+			if (pgd_none(*pgd))
+				set_pgd(pgd, *pgd_ref);
+			else
+				BUG_ON(pgd_page_vaddr(*pgd)
+					 != pgd_page_vaddr(*pgd_ref));
+		}
+		spin_unlock_irqrestore(&pgd_lock, flags);
+	}
+}
+
+/*
   * NOTE: This function is marked __ref because it calls __init function
   * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
   */
-- 
1.5.6.1



^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [BUGFIX][PATCH 1/2] x86, mem: separate x86_64 vmalloc_sync_all() into separate functions
@ 2010-08-20  9:45 ` Haicheng Li
  0 siblings, 0 replies; 14+ messages in thread
From: Haicheng Li @ 2010-08-20  9:45 UTC (permalink / raw)
  To: H. Peter Anvin, Ingo Molnar, Thomas Gleixner, akpm
  Cc: ak, Wu Fengguang, linux-kernel, linux-mm

hello,

Resend these two patches for bug fixing:

The bug is that when memory hotplug-adding happens for a large enough area that a new PGD entry is
needed for the direct mapping, the PGDs of other processes would not get updated. This leads to some
CPUs oopsing when they have to access the unmapped areas, e.g. onlining CPUs on the new added node.

Thanks!

---
 From 04d9fc860db40f15ad629f8b341ff84b83a00a8d Mon Sep 17 00:00:00 2001
From: Haicheng Li <haicheng.li@linux.intel.com>
Date: Wed, 19 May 2010 17:42:14 +0800
Subject: [PATCH] x86, mem-hotplug: separate x86_64 vmalloc_sync_all() into separate functions

No behavior change here.

Move some of vmalloc_sync_all() code into a new function
sync_global_pgds() that will be useful for memory hotplug.

Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
CC: Andi Kleen <ak@linux.intel.com>
---
  arch/x86/include/asm/pgtable_64.h |    2 ++
  arch/x86/mm/fault.c               |   24 +-----------------------
  arch/x86/mm/init_64.c             |   30 ++++++++++++++++++++++++++++++
  3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 181be52..317026d 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -102,6 +102,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
  	native_set_pgd(pgd, native_make_pgd(0));
  }

+extern void sync_global_pgds(unsigned long start, unsigned long end);
+
  /*
   * Conversion functions: convert a page and protection to a page entry,
   * and a page entry and page directory to the page they refer to.
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f627779..7ae0897 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -326,29 +326,7 @@ out:

  void vmalloc_sync_all(void)
  {
-	unsigned long address;
-
-	for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
-	     address += PGDIR_SIZE) {
-
-		const pgd_t *pgd_ref = pgd_offset_k(address);
-		unsigned long flags;
-		struct page *page;
-
-		if (pgd_none(*pgd_ref))
-			continue;
-
-		spin_lock_irqsave(&pgd_lock, flags);
-		list_for_each_entry(page, &pgd_list, lru) {
-			pgd_t *pgd;
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			if (pgd_none(*pgd))
-				set_pgd(pgd, *pgd_ref);
-			else
-				BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-		}
-		spin_unlock_irqrestore(&pgd_lock, flags);
-	}
+	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
  }

  /*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ee41bba..b0c3df0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -98,6 +98,36 @@ static int __init nonx32_setup(char *str)
  __setup("noexec32=", nonx32_setup);

  /*
+ * When memory was added/removed make sure all the processes MM have
+ * suitable PGD entries in the local PGD level page.
+ */
+void sync_global_pgds(unsigned long start, unsigned long end)
+{
+	unsigned long address;
+
+	for (address = start; address <= end; address += PGDIR_SIZE) {
+		const pgd_t *pgd_ref = pgd_offset_k(address);
+		unsigned long flags;
+		struct page *page;
+
+		if (pgd_none(*pgd_ref))
+			continue;
+
+		spin_lock_irqsave(&pgd_lock, flags);
+		list_for_each_entry(page, &pgd_list, lru) {
+			pgd_t *pgd;
+			pgd = (pgd_t *)page_address(page) + pgd_index(address);
+			if (pgd_none(*pgd))
+				set_pgd(pgd, *pgd_ref);
+			else
+				BUG_ON(pgd_page_vaddr(*pgd)
+					 != pgd_page_vaddr(*pgd_ref));
+		}
+		spin_unlock_irqrestore(&pgd_lock, flags);
+	}
+}
+
+/*
   * NOTE: This function is marked __ref because it calls __init function
   * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
   */
-- 
1.5.6.1


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [BUGFIX][PATCH 2/2] x86, mem: update all PGDs for direct mapping and vmemmap mapping changes on 64bit.
  2010-08-20  9:45 ` Haicheng Li
@ 2010-08-20  9:50   ` Haicheng Li
  -1 siblings, 0 replies; 14+ messages in thread
From: Haicheng Li @ 2010-08-20  9:50 UTC (permalink / raw)
  To: Haicheng Li, H. Peter Anvin, Ingo Molnar, Thomas Gleixner, akpm
  Cc: ak, Wu Fengguang, linux-kernel, linux-mm

x86, mem-hotplug: update all PGDs for direct mapping and vmemmap mapping changes on 64bit.

When memory hotplug-adding happens for a large enough area
that a new PGD entry is needed for the direct mapping, the PGDs
of other processes would not get updated. This leads to some CPUs
oopsing like below when they have to access the unmapped areas.

[ 1139.243192] BUG: soft lockup - CPU#0 stuck for 61s! [bash:6534]
[ 1139.243195] Modules linked in: ipv6 autofs4 rfcomm l2cap crc16 bluetooth rfkill binfmt_misc
dm_mirror dm_region_hash dm_log dm_multipath dm_mod video output sbs sbshc fan battery ac parport_pc
lp parport joydev usbhid processor thermal thermal_sys container button rtc_cmos rtc_core rtc_lib
i2c_i801 i2c_core pcspkr uhci_hcd ohci_hcd ehci_hcd usbcore
[ 1139.243229] irq event stamp: 8538759
[ 1139.243230] hardirqs last  enabled at (8538759): [<ffffffff8100c3fc>] restore_args+0x0/0x30
[ 1139.243236] hardirqs last disabled at (8538757): [<ffffffff810422df>] __do_softirq+0x106/0x146
[ 1139.243240] softirqs last  enabled at (8538758): [<ffffffff81042310>] __do_softirq+0x137/0x146
[ 1139.243245] softirqs last disabled at (8538743): [<ffffffff8100cb5c>] call_softirq+0x1c/0x34
[ 1139.243249] CPU 0:
[ 1139.243250] Modules linked in: ipv6 autofs4 rfcomm l2cap crc16 bluetooth rfkill binfmt_misc
dm_mirror dm_region_hash dm_log dm_multipath dm_mod video output sbs sbshc fan battery ac parport_pc
lp parport joydev usbhid processor thermal thermal_sys container button rtc_cmos rtc_core rtc_lib
i2c_i801 i2c_core pcspkr uhci_hcd ohci_hcd ehci_hcd usbcore
[ 1139.243284] Pid: 6534, comm: bash Tainted: G   M       2.6.32-haicheng-cpuhp #7 QSSC-S4R
[ 1139.243287] RIP: 0010:[<ffffffff810ace35>]  [<ffffffff810ace35>] alloc_arraycache+0x35/0x69
[ 1139.243292] RSP: 0018:ffff8802799f9d78  EFLAGS: 00010286
[ 1139.243295] RAX: ffff8884ffc00000 RBX: ffff8802799f9d98 RCX: 0000000000000000
[ 1139.243297] RDX: 0000000000190018 RSI: 0000000000000001 RDI: ffff8884ffc00010
[ 1139.243300] RBP: ffffffff8100c34e R08: 0000000000000002 R09: 0000000000000000
[ 1139.243303] R10: ffffffff8246dda0 R11: 000000d08246dda0 R12: ffff8802599bfff0
[ 1139.243305] R13: ffff88027904c040 R14: ffff8802799f8000 R15: 0000000000000001
[ 1139.243308] FS:  00007fe81bfe86e0(0000) GS:ffff88000d800000(0000) knlGS:0000000000000000
[ 1139.243311] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1139.243313] CR2: ffff8884ffc00000 CR3: 000000026cf2d000 CR4: 00000000000006f0
[ 1139.243316] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1139.243318] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 1139.243321] Call Trace:
[ 1139.243324]  [<ffffffff810ace29>] ? alloc_arraycache+0x29/0x69
[ 1139.243328]  [<ffffffff8135004e>] ? cpuup_callback+0x1b0/0x32a
[ 1139.243333]  [<ffffffff8105385d>] ? notifier_call_chain+0x33/0x5b
[ 1139.243337]  [<ffffffff810538a4>] ? __raw_notifier_call_chain+0x9/0xb
[ 1139.243340]  [<ffffffff8134ecfc>] ? cpu_up+0xb3/0x152
[ 1139.243344]  [<ffffffff813388ce>] ? store_online+0x4d/0x75
[ 1139.243348]  [<ffffffff811e53f3>] ? sysdev_store+0x1b/0x1d
[ 1139.243351]  [<ffffffff8110589f>] ? sysfs_write_file+0xe5/0x121
[ 1139.243355]  [<ffffffff810b539d>] ? vfs_write+0xae/0x14a
[ 1139.243358]  [<ffffffff810b587f>] ? sys_write+0x47/0x6f
[ 1139.243362]  [<ffffffff8100b9ab>] ? system_call_fastpath+0x16/0x1b

This patch makes sure to always replicate new direct mapping PGD entries
to the PGDs of all processes, as well as ensures corresponding vmemmap
mapping gets synced.

V1: initial code by Andi Kleen.
V2: fix several issues found in testing.
V3: as suggested by Wu Fengguang, reuse common code of vmalloc_sync_all().

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
---
  arch/x86/mm/init_64.c |    8 +++++++-
  1 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b0c3df0..fa72b4b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -564,11 +564,13 @@ kernel_physical_mapping_init(unsigned long start,
  			     unsigned long end,
  			     unsigned long page_size_mask)
  {
-
+	int pgd_changed = 0;
  	unsigned long next, last_map_addr = end;
+	unsigned long addr;

  	start = (unsigned long)__va(start);
  	end = (unsigned long)__va(end);
+	addr = start;

  	for (; start < end; start = next) {
  		pgd_t *pgd = pgd_offset_k(start);
@@ -593,7 +595,10 @@ kernel_physical_mapping_init(unsigned long start,
  		spin_lock(&init_mm.page_table_lock);
  		pgd_populate(&init_mm, pgd, __va(pud_phys));
  		spin_unlock(&init_mm.page_table_lock);
+		pgd_changed = 1;
  	}
+	if (pgd_changed)
+		sync_global_pgds(addr, end);
  	__flush_tlb_all();

  	return last_map_addr;
@@ -1033,6 +1038,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
  		}

  	}
+	sync_global_pgds((unsigned long)start_page, end);
  	return 0;
  }

-- 
1.5.6.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [BUGFIX][PATCH 2/2] x86, mem: update all PGDs for direct mapping and vmemmap mapping changes on 64bit.
@ 2010-08-20  9:50   ` Haicheng Li
  0 siblings, 0 replies; 14+ messages in thread
From: Haicheng Li @ 2010-08-20  9:50 UTC (permalink / raw)
  To: Haicheng Li, H. Peter Anvin, Ingo Molnar, Thomas Gleixner, akpm
  Cc: ak, Wu Fengguang, linux-kernel, linux-mm

x86, mem-hotplug: update all PGDs for direct mapping and vmemmap mapping changes on 64bit.

When memory hotplug-adding happens for a large enough area
that a new PGD entry is needed for the direct mapping, the PGDs
of other processes would not get updated. This leads to some CPUs
oopsing like below when they have to access the unmapped areas.

[ 1139.243192] BUG: soft lockup - CPU#0 stuck for 61s! [bash:6534]
[ 1139.243195] Modules linked in: ipv6 autofs4 rfcomm l2cap crc16 bluetooth rfkill binfmt_misc
dm_mirror dm_region_hash dm_log dm_multipath dm_mod video output sbs sbshc fan battery ac parport_pc
lp parport joydev usbhid processor thermal thermal_sys container button rtc_cmos rtc_core rtc_lib
i2c_i801 i2c_core pcspkr uhci_hcd ohci_hcd ehci_hcd usbcore
[ 1139.243229] irq event stamp: 8538759
[ 1139.243230] hardirqs last  enabled at (8538759): [<ffffffff8100c3fc>] restore_args+0x0/0x30
[ 1139.243236] hardirqs last disabled at (8538757): [<ffffffff810422df>] __do_softirq+0x106/0x146
[ 1139.243240] softirqs last  enabled at (8538758): [<ffffffff81042310>] __do_softirq+0x137/0x146
[ 1139.243245] softirqs last disabled at (8538743): [<ffffffff8100cb5c>] call_softirq+0x1c/0x34
[ 1139.243249] CPU 0:
[ 1139.243250] Modules linked in: ipv6 autofs4 rfcomm l2cap crc16 bluetooth rfkill binfmt_misc
dm_mirror dm_region_hash dm_log dm_multipath dm_mod video output sbs sbshc fan battery ac parport_pc
lp parport joydev usbhid processor thermal thermal_sys container button rtc_cmos rtc_core rtc_lib
i2c_i801 i2c_core pcspkr uhci_hcd ohci_hcd ehci_hcd usbcore
[ 1139.243284] Pid: 6534, comm: bash Tainted: G   M       2.6.32-haicheng-cpuhp #7 QSSC-S4R
[ 1139.243287] RIP: 0010:[<ffffffff810ace35>]  [<ffffffff810ace35>] alloc_arraycache+0x35/0x69
[ 1139.243292] RSP: 0018:ffff8802799f9d78  EFLAGS: 00010286
[ 1139.243295] RAX: ffff8884ffc00000 RBX: ffff8802799f9d98 RCX: 0000000000000000
[ 1139.243297] RDX: 0000000000190018 RSI: 0000000000000001 RDI: ffff8884ffc00010
[ 1139.243300] RBP: ffffffff8100c34e R08: 0000000000000002 R09: 0000000000000000
[ 1139.243303] R10: ffffffff8246dda0 R11: 000000d08246dda0 R12: ffff8802599bfff0
[ 1139.243305] R13: ffff88027904c040 R14: ffff8802799f8000 R15: 0000000000000001
[ 1139.243308] FS:  00007fe81bfe86e0(0000) GS:ffff88000d800000(0000) knlGS:0000000000000000
[ 1139.243311] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1139.243313] CR2: ffff8884ffc00000 CR3: 000000026cf2d000 CR4: 00000000000006f0
[ 1139.243316] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1139.243318] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 1139.243321] Call Trace:
[ 1139.243324]  [<ffffffff810ace29>] ? alloc_arraycache+0x29/0x69
[ 1139.243328]  [<ffffffff8135004e>] ? cpuup_callback+0x1b0/0x32a
[ 1139.243333]  [<ffffffff8105385d>] ? notifier_call_chain+0x33/0x5b
[ 1139.243337]  [<ffffffff810538a4>] ? __raw_notifier_call_chain+0x9/0xb
[ 1139.243340]  [<ffffffff8134ecfc>] ? cpu_up+0xb3/0x152
[ 1139.243344]  [<ffffffff813388ce>] ? store_online+0x4d/0x75
[ 1139.243348]  [<ffffffff811e53f3>] ? sysdev_store+0x1b/0x1d
[ 1139.243351]  [<ffffffff8110589f>] ? sysfs_write_file+0xe5/0x121
[ 1139.243355]  [<ffffffff810b539d>] ? vfs_write+0xae/0x14a
[ 1139.243358]  [<ffffffff810b587f>] ? sys_write+0x47/0x6f
[ 1139.243362]  [<ffffffff8100b9ab>] ? system_call_fastpath+0x16/0x1b

This patch makes sure to always replicate new direct mapping PGD entries
to the PGDs of all processes, as well as ensures corresponding vmemmap
mapping gets synced.

V1: initial code by Andi Kleen.
V2: fix several issues found in testing.
V3: as suggested by Wu Fengguang, reuse common code of vmalloc_sync_all().

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
---
  arch/x86/mm/init_64.c |    8 +++++++-
  1 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b0c3df0..fa72b4b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -564,11 +564,13 @@ kernel_physical_mapping_init(unsigned long start,
  			     unsigned long end,
  			     unsigned long page_size_mask)
  {
-
+	int pgd_changed = 0;
  	unsigned long next, last_map_addr = end;
+	unsigned long addr;

  	start = (unsigned long)__va(start);
  	end = (unsigned long)__va(end);
+	addr = start;

  	for (; start < end; start = next) {
  		pgd_t *pgd = pgd_offset_k(start);
@@ -593,7 +595,10 @@ kernel_physical_mapping_init(unsigned long start,
  		spin_lock(&init_mm.page_table_lock);
  		pgd_populate(&init_mm, pgd, __va(pud_phys));
  		spin_unlock(&init_mm.page_table_lock);
+		pgd_changed = 1;
  	}
+	if (pgd_changed)
+		sync_global_pgds(addr, end);
  	__flush_tlb_all();

  	return last_map_addr;
@@ -1033,6 +1038,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
  		}

  	}
+	sync_global_pgds((unsigned long)start_page, end);
  	return 0;
  }

-- 
1.5.6.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [BUGFIX][PATCH 1/2] x86, mem: separate x86_64 vmalloc_sync_all() into separate functions
  2010-08-20  9:45 ` Haicheng Li
@ 2010-08-25  7:45   ` Andi Kleen
  -1 siblings, 0 replies; 14+ messages in thread
From: Andi Kleen @ 2010-08-25  7:45 UTC (permalink / raw)
  To: Haicheng Li
  Cc: H. Peter Anvin, Ingo Molnar, Thomas Gleixner, akpm, ak,
	Wu Fengguang, linux-kernel, linux-mm

Haicheng Li <haicheng.li@linux.intel.com> writes:

> hello,
>
> Resend these two patches for bug fixing:
>
> The bug is that when memory hotplug-adding happens for a large enough area that a new PGD entry is
> needed for the direct mapping, the PGDs of other processes would not get updated. This leads to some
> CPUs oopsing when they have to access the unmapped areas, e.g. onlining CPUs on the new added node.

The patches look good to me. Can we please move forward with this?

Reviewed-by: Andi Kleen <ak@linux.intel.com>

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [BUGFIX][PATCH 1/2] x86, mem: separate x86_64 vmalloc_sync_all() into separate functions
@ 2010-08-25  7:45   ` Andi Kleen
  0 siblings, 0 replies; 14+ messages in thread
From: Andi Kleen @ 2010-08-25  7:45 UTC (permalink / raw)
  To: Haicheng Li
  Cc: H. Peter Anvin, Ingo Molnar, Thomas Gleixner, akpm, ak,
	Wu Fengguang, linux-kernel, linux-mm

Haicheng Li <haicheng.li@linux.intel.com> writes:

> hello,
>
> Resend these two patches for bug fixing:
>
> The bug is that when memory hotplug-adding happens for a large enough area that a new PGD entry is
> needed for the direct mapping, the PGDs of other processes would not get updated. This leads to some
> CPUs oopsing when they have to access the unmapped areas, e.g. onlining CPUs on the new added node.

The patches look good to me. Can we please move forward with this?

Reviewed-by: Andi Kleen <ak@linux.intel.com>

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [BUGFIX][PATCH 1/2] x86, mem: separate x86_64 vmalloc_sync_all() into separate functions
  2010-08-25  7:45   ` Andi Kleen
@ 2010-08-25 19:14     ` H. Peter Anvin
  -1 siblings, 0 replies; 14+ messages in thread
From: H. Peter Anvin @ 2010-08-25 19:14 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Haicheng Li, Ingo Molnar, Thomas Gleixner, akpm, ak,
	Wu Fengguang, linux-kernel, linux-mm

On 08/25/2010 12:45 AM, Andi Kleen wrote:
> Haicheng Li <haicheng.li@linux.intel.com> writes:
> 
>> hello,
>>
>> Resend these two patches for bug fixing:
>>
>> The bug is that when memory hotplug-adding happens for a large enough area that a new PGD entry is
>> needed for the direct mapping, the PGDs of other processes would not get updated. This leads to some
>> CPUs oopsing when they have to access the unmapped areas, e.g. onlining CPUs on the new added node.
> 
> The patches look good to me. Can we please move forward with this?
> 
> Reviewed-by: Andi Kleen <ak@linux.intel.com>
> 

The patches are mangled so they don't apply even with patch -l --
Haicheng, could you send me another copy, as an attachment if necessary?

	-hpa

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [BUGFIX][PATCH 1/2] x86, mem: separate x86_64 vmalloc_sync_all() into separate functions
@ 2010-08-25 19:14     ` H. Peter Anvin
  0 siblings, 0 replies; 14+ messages in thread
From: H. Peter Anvin @ 2010-08-25 19:14 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Haicheng Li, Ingo Molnar, Thomas Gleixner, akpm, ak,
	Wu Fengguang, linux-kernel, linux-mm

On 08/25/2010 12:45 AM, Andi Kleen wrote:
> Haicheng Li <haicheng.li@linux.intel.com> writes:
> 
>> hello,
>>
>> Resend these two patches for bug fixing:
>>
>> The bug is that when memory hotplug-adding happens for a large enough area that a new PGD entry is
>> needed for the direct mapping, the PGDs of other processes would not get updated. This leads to some
>> CPUs oopsing when they have to access the unmapped areas, e.g. onlining CPUs on the new added node.
> 
> The patches look good to me. Can we please move forward with this?
> 
> Reviewed-by: Andi Kleen <ak@linux.intel.com>
> 

The patches are mangled so they don't apply even with patch -l --
Haicheng, could you send me another copy, as an attachment if necessary?

	-hpa

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [BUGFIX][PATCH 1/2] x86, mem: separate x86_64 vmalloc_sync_all() into separate functions
  2010-08-25 19:14     ` H. Peter Anvin
@ 2010-08-25 21:36       ` H. Peter Anvin
  -1 siblings, 0 replies; 14+ messages in thread
From: H. Peter Anvin @ 2010-08-25 21:36 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Haicheng Li, Ingo Molnar, Thomas Gleixner, akpm, ak,
	Wu Fengguang, linux-kernel, linux-mm

On 08/25/2010 12:14 PM, H. Peter Anvin wrote:
> 
> The patches are mangled so they don't apply even with patch -l --
> Haicheng, could you send me another copy, as an attachment if necessary?
> 

Never mind, I fixed them up by hand.

	-hpa

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [BUGFIX][PATCH 1/2] x86, mem: separate x86_64 vmalloc_sync_all() into separate functions
@ 2010-08-25 21:36       ` H. Peter Anvin
  0 siblings, 0 replies; 14+ messages in thread
From: H. Peter Anvin @ 2010-08-25 21:36 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Haicheng Li, Ingo Molnar, Thomas Gleixner, akpm, ak,
	Wu Fengguang, linux-kernel, linux-mm

On 08/25/2010 12:14 PM, H. Peter Anvin wrote:
> 
> The patches are mangled so they don't apply even with patch -l --
> Haicheng, could you send me another copy, as an attachment if necessary?
> 

Never mind, I fixed them up by hand.

	-hpa

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [tip:x86/mm] x86, mm: Separate x86_64 vmalloc_sync_all() into separate functions
  2010-08-20  9:45 ` Haicheng Li
                   ` (2 preceding siblings ...)
  (?)
@ 2010-08-25 21:37 ` tip-bot for Haicheng Li
  -1 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Haicheng Li @ 2010-08-25 21:37 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, haicheng.li, hpa, mingo, ak, tglx, fengguang.wu, hpa

Commit-ID:  7b61b1ccb1a84c3391360ac94c908bfbbb35299c
Gitweb:     http://git.kernel.org/tip/7b61b1ccb1a84c3391360ac94c908bfbbb35299c
Author:     Haicheng Li <haicheng.li@linux.intel.com>
AuthorDate: Wed, 19 May 2010 17:42:14 +0800
Committer:  H. Peter Anvin <hpa@linux.intel.com>
CommitDate: Wed, 25 Aug 2010 13:47:14 -0700

x86, mm: Separate x86_64 vmalloc_sync_all() into separate functions

No behavior change.

Move some of vmalloc_sync_all() code into a new function
sync_global_pgds() that will be useful for memory hotplug.

Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
LKML-Reference: <4C6E4ECD.1090607@linux.intel.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [tip:x86/mm] x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes
  2010-08-20  9:50   ` Haicheng Li
  (?)
@ 2010-08-25 21:37   ` tip-bot for Haicheng Li
  -1 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Haicheng Li @ 2010-08-25 21:37 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, haicheng.li, hpa, mingo, ak, tglx, fengguang.wu, hpa

Commit-ID:  f4a9f26dd0041423da94bb22dfed742540915487
Gitweb:     http://git.kernel.org/tip/f4a9f26dd0041423da94bb22dfed742540915487
Author:     Haicheng Li <haicheng.li@linux.intel.com>
AuthorDate: Fri, 20 Aug 2010 17:50:16 +0800
Committer:  H. Peter Anvin <hpa@linux.intel.com>
CommitDate: Wed, 25 Aug 2010 14:01:03 -0700

x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes

When memory hotplug-adding happens for a large enough area
that a new PGD entry is needed for the direct mapping, the PGDs
of other processes would not get updated. This leads to some CPUs
oopsing like below when they have to access the unmapped areas.

[ 1139.243192] BUG: soft lockup - CPU#0 stuck for 61s! [bash:6534]
[ 1139.243195] Modules linked in: ipv6 autofs4 rfcomm l2cap crc16 bluetooth rfkill binfmt_misc
dm_mirror dm_region_hash dm_log dm_multipath dm_mod video output sbs sbshc fan battery ac parport_pc
lp parport joydev usbhid processor thermal thermal_sys container button rtc_cmos rtc_core rtc_lib
i2c_i801 i2c_core pcspkr uhci_hcd ohci_hcd ehci_hcd usbcore
[ 1139.243229] irq event stamp: 8538759
[ 1139.243230] hardirqs last  enabled at (8538759): [<ffffffff8100c3fc>] restore_args+0x0/0x30
[ 1139.243236] hardirqs last disabled at (8538757): [<ffffffff810422df>] __do_softirq+0x106/0x146
[ 1139.243240] softirqs last  enabled at (8538758): [<ffffffff81042310>] __do_softirq+0x137/0x146
[ 1139.243245] softirqs last disabled at (8538743): [<ffffffff8100cb5c>] call_softirq+0x1c/0x34
[ 1139.243249] CPU 0:
[ 1139.243250] Modules linked in: ipv6 autofs4 rfcomm l2cap crc16 bluetooth rfkill binfmt_misc
dm_mirror dm_region_hash dm_log dm_multipath dm_mod video output sbs sbshc fan battery ac parport_pc
lp parport joydev usbhid processor thermal thermal_sys container button rtc_cmos rtc_core rtc_lib
i2c_i801 i2c_core pcspkr uhci_hcd ohci_hcd ehci_hcd usbcore
[ 1139.243284] Pid: 6534, comm: bash Tainted: G   M       2.6.32-haicheng-cpuhp #7 QSSC-S4R
[ 1139.243287] RIP: 0010:[<ffffffff810ace35>]  [<ffffffff810ace35>] alloc_arraycache+0x35/0x69
[ 1139.243292] RSP: 0018:ffff8802799f9d78  EFLAGS: 00010286
[ 1139.243295] RAX: ffff8884ffc00000 RBX: ffff8802799f9d98 RCX: 0000000000000000
[ 1139.243297] RDX: 0000000000190018 RSI: 0000000000000001 RDI: ffff8884ffc00010
[ 1139.243300] RBP: ffffffff8100c34e R08: 0000000000000002 R09: 0000000000000000
[ 1139.243303] R10: ffffffff8246dda0 R11: 000000d08246dda0 R12: ffff8802599bfff0
[ 1139.243305] R13: ffff88027904c040 R14: ffff8802799f8000 R15: 0000000000000001
[ 1139.243308] FS:  00007fe81bfe86e0(0000) GS:ffff88000d800000(0000) knlGS:0000000000000000
[ 1139.243311] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1139.243313] CR2: ffff8884ffc00000 CR3: 000000026cf2d000 CR4: 00000000000006f0
[ 1139.243316] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1139.243318] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 1139.243321] Call Trace:
[ 1139.243324]  [<ffffffff810ace29>] ? alloc_arraycache+0x29/0x69
[ 1139.243328]  [<ffffffff8135004e>] ? cpuup_callback+0x1b0/0x32a
[ 1139.243333]  [<ffffffff8105385d>] ? notifier_call_chain+0x33/0x5b
[ 1139.243337]  [<ffffffff810538a4>] ? __raw_notifier_call_chain+0x9/0xb
[ 1139.243340]  [<ffffffff8134ecfc>] ? cpu_up+0xb3/0x152
[ 1139.243344]  [<ffffffff813388ce>] ? store_online+0x4d/0x75
[ 1139.243348]  [<ffffffff811e53f3>] ? sysdev_store+0x1b/0x1d
[ 1139.243351]  [<ffffffff8110589f>] ? sysfs_write_file+0xe5/0x121
[ 1139.243355]  [<ffffffff810b539d>] ? vfs_write+0xae/0x14a
[ 1139.243358]  [<ffffffff810b587f>] ? sys_write+0x47/0x6f
[ 1139.243362]  [<ffffffff8100b9ab>] ? system_call_fastpath+0x16/0x1b

This patch makes sure to always replicate new direct mapping PGD entries
to the PGDs of all processes, as well as ensures corresponding vmemmap
mapping gets synced.

V1: initial code by Andi Kleen.
V2: fix several issues found in testing.
V3: as suggested by Wu Fengguang, reuse common code of vmalloc_sync_all().

[ hpa: changed pgd_change from int to bool ]

Originally-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
LKML-Reference: <4C6E4FD8.6080100@linux.intel.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/mm/init_64.c |    9 ++++++++-
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9a66746..de852ad 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -534,8 +534,9 @@ kernel_physical_mapping_init(unsigned long start,
 			     unsigned long end,
 			     unsigned long page_size_mask)
 {
-
+	bool pgd_changed = false;
 	unsigned long next, last_map_addr = end;
+	unsigned long addr;
 
 	start = (unsigned long)__va(start);
 	end = (unsigned long)__va(end);
@@ -563,7 +564,12 @@ kernel_physical_mapping_init(unsigned long start,
 		spin_lock(&init_mm.page_table_lock);
 		pgd_populate(&init_mm, pgd, __va(pud_phys));
 		spin_unlock(&init_mm.page_table_lock);
+		pgd_changed = true;
 	}
+
+	if (pgd_changed)
+		sync_global_pgds(addr, end);
+
 	__flush_tlb_all();
 
 	return last_map_addr;
@@ -1003,6 +1009,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
 		}
 
 	}
+	sync_global_pgds((unsigned long)start_page, end);
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [tip:x86/mm] x86, mm: Separate x86_64 vmalloc_sync_all() into separate functions
  2010-08-20  9:45 ` Haicheng Li
                   ` (3 preceding siblings ...)
  (?)
@ 2010-08-26 21:33 ` tip-bot for Haicheng Li
  -1 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Haicheng Li @ 2010-08-26 21:33 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, haicheng.li, hpa, mingo, ak, tglx, fengguang.wu, hpa

Commit-ID:  6afb5157b9eba4092e2f0f54d24a3806409bdde5
Gitweb:     http://git.kernel.org/tip/6afb5157b9eba4092e2f0f54d24a3806409bdde5
Author:     Haicheng Li <haicheng.li@linux.intel.com>
AuthorDate: Wed, 19 May 2010 17:42:14 +0800
Committer:  H. Peter Anvin <hpa@linux.intel.com>
CommitDate: Thu, 26 Aug 2010 14:02:29 -0700

x86, mm: Separate x86_64 vmalloc_sync_all() into separate functions

No behavior change.

Move some of vmalloc_sync_all() code into a new function
sync_global_pgds() that will be useful for memory hotplug.

Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
LKML-Reference: <4C6E4ECD.1090607@linux.intel.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/pgtable_64.h |    2 ++
 arch/x86/mm/fault.c               |   24 +-----------------------
 arch/x86/mm/init_64.c             |   30 ++++++++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 076052c..f96ac9b 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -102,6 +102,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
 	native_set_pgd(pgd, native_make_pgd(0));
 }
 
+extern void sync_global_pgds(unsigned long start, unsigned long end);
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4c4508e..51f7ee7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -326,29 +326,7 @@ out:
 
 void vmalloc_sync_all(void)
 {
-	unsigned long address;
-
-	for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
-	     address += PGDIR_SIZE) {
-
-		const pgd_t *pgd_ref = pgd_offset_k(address);
-		unsigned long flags;
-		struct page *page;
-
-		if (pgd_none(*pgd_ref))
-			continue;
-
-		spin_lock_irqsave(&pgd_lock, flags);
-		list_for_each_entry(page, &pgd_list, lru) {
-			pgd_t *pgd;
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			if (pgd_none(*pgd))
-				set_pgd(pgd, *pgd_ref);
-			else
-				BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-		}
-		spin_unlock_irqrestore(&pgd_lock, flags);
-	}
+	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
 }
 
 /*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9a66746..61a1b4f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -98,6 +98,36 @@ static int __init nonx32_setup(char *str)
 __setup("noexec32=", nonx32_setup);
 
 /*
+ * When memory was added/removed make sure all the processes MM have
+ * suitable PGD entries in the local PGD level page.
+ */
+void sync_global_pgds(unsigned long start, unsigned long end)
+{
+       unsigned long address;
+
+       for (address = start; address <= end; address += PGDIR_SIZE) {
+	       const pgd_t *pgd_ref = pgd_offset_k(address);
+	       unsigned long flags;
+	       struct page *page;
+
+	       if (pgd_none(*pgd_ref))
+		       continue;
+
+	       spin_lock_irqsave(&pgd_lock, flags);
+	       list_for_each_entry(page, &pgd_list, lru) {
+		       pgd_t *pgd;
+		       pgd = (pgd_t *)page_address(page) + pgd_index(address);
+		       if (pgd_none(*pgd))
+			       set_pgd(pgd, *pgd_ref);
+		       else
+			       BUG_ON(pgd_page_vaddr(*pgd)
+					!= pgd_page_vaddr(*pgd_ref));
+	       }
+	       spin_unlock_irqrestore(&pgd_lock, flags);
+       }
+}
+
+/*
  * NOTE: This function is marked __ref because it calls __init function
  * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
  */

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [tip:x86/mm] x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes
  2010-08-20  9:50   ` Haicheng Li
  (?)
  (?)
@ 2010-08-26 21:34   ` tip-bot for Haicheng Li
  -1 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Haicheng Li @ 2010-08-26 21:34 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, haicheng.li, hpa, mingo, ak, tglx, fengguang.wu, hpa

Commit-ID:  9b861528a8012e7bc4d1f7bae07395b225331477
Gitweb:     http://git.kernel.org/tip/9b861528a8012e7bc4d1f7bae07395b225331477
Author:     Haicheng Li <haicheng.li@linux.intel.com>
AuthorDate: Fri, 20 Aug 2010 17:50:16 +0800
Committer:  H. Peter Anvin <hpa@linux.intel.com>
CommitDate: Thu, 26 Aug 2010 14:02:33 -0700

x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes

When memory hotplug-adding happens for a large enough area
that a new PGD entry is needed for the direct mapping, the PGDs
of other processes would not get updated. This leads to some CPUs
oopsing like below when they have to access the unmapped areas.

[ 1139.243192] BUG: soft lockup - CPU#0 stuck for 61s! [bash:6534]
[ 1139.243195] Modules linked in: ipv6 autofs4 rfcomm l2cap crc16 bluetooth rfkill binfmt_misc
dm_mirror dm_region_hash dm_log dm_multipath dm_mod video output sbs sbshc fan battery ac parport_pc
lp parport joydev usbhid processor thermal thermal_sys container button rtc_cmos rtc_core rtc_lib
i2c_i801 i2c_core pcspkr uhci_hcd ohci_hcd ehci_hcd usbcore
[ 1139.243229] irq event stamp: 8538759
[ 1139.243230] hardirqs last  enabled at (8538759): [<ffffffff8100c3fc>] restore_args+0x0/0x30
[ 1139.243236] hardirqs last disabled at (8538757): [<ffffffff810422df>] __do_softirq+0x106/0x146
[ 1139.243240] softirqs last  enabled at (8538758): [<ffffffff81042310>] __do_softirq+0x137/0x146
[ 1139.243245] softirqs last disabled at (8538743): [<ffffffff8100cb5c>] call_softirq+0x1c/0x34
[ 1139.243249] CPU 0:
[ 1139.243250] Modules linked in: ipv6 autofs4 rfcomm l2cap crc16 bluetooth rfkill binfmt_misc
dm_mirror dm_region_hash dm_log dm_multipath dm_mod video output sbs sbshc fan battery ac parport_pc
lp parport joydev usbhid processor thermal thermal_sys container button rtc_cmos rtc_core rtc_lib
i2c_i801 i2c_core pcspkr uhci_hcd ohci_hcd ehci_hcd usbcore
[ 1139.243284] Pid: 6534, comm: bash Tainted: G   M       2.6.32-haicheng-cpuhp #7 QSSC-S4R
[ 1139.243287] RIP: 0010:[<ffffffff810ace35>]  [<ffffffff810ace35>] alloc_arraycache+0x35/0x69
[ 1139.243292] RSP: 0018:ffff8802799f9d78  EFLAGS: 00010286
[ 1139.243295] RAX: ffff8884ffc00000 RBX: ffff8802799f9d98 RCX: 0000000000000000
[ 1139.243297] RDX: 0000000000190018 RSI: 0000000000000001 RDI: ffff8884ffc00010
[ 1139.243300] RBP: ffffffff8100c34e R08: 0000000000000002 R09: 0000000000000000
[ 1139.243303] R10: ffffffff8246dda0 R11: 000000d08246dda0 R12: ffff8802599bfff0
[ 1139.243305] R13: ffff88027904c040 R14: ffff8802799f8000 R15: 0000000000000001
[ 1139.243308] FS:  00007fe81bfe86e0(0000) GS:ffff88000d800000(0000) knlGS:0000000000000000
[ 1139.243311] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1139.243313] CR2: ffff8884ffc00000 CR3: 000000026cf2d000 CR4: 00000000000006f0
[ 1139.243316] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1139.243318] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 1139.243321] Call Trace:
[ 1139.243324]  [<ffffffff810ace29>] ? alloc_arraycache+0x29/0x69
[ 1139.243328]  [<ffffffff8135004e>] ? cpuup_callback+0x1b0/0x32a
[ 1139.243333]  [<ffffffff8105385d>] ? notifier_call_chain+0x33/0x5b
[ 1139.243337]  [<ffffffff810538a4>] ? __raw_notifier_call_chain+0x9/0xb
[ 1139.243340]  [<ffffffff8134ecfc>] ? cpu_up+0xb3/0x152
[ 1139.243344]  [<ffffffff813388ce>] ? store_online+0x4d/0x75
[ 1139.243348]  [<ffffffff811e53f3>] ? sysdev_store+0x1b/0x1d
[ 1139.243351]  [<ffffffff8110589f>] ? sysfs_write_file+0xe5/0x121
[ 1139.243355]  [<ffffffff810b539d>] ? vfs_write+0xae/0x14a
[ 1139.243358]  [<ffffffff810b587f>] ? sys_write+0x47/0x6f
[ 1139.243362]  [<ffffffff8100b9ab>] ? system_call_fastpath+0x16/0x1b

This patch makes sure to always replicate new direct mapping PGD entries
to the PGDs of all processes, as well as ensures corresponding vmemmap
mapping gets synced.

V1: initial code by Andi Kleen.
V2: fix several issues found in testing.
V3: as suggested by Wu Fengguang, reuse common code of vmalloc_sync_all().

[ hpa: changed pgd_change from int to bool ]

Originally-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
LKML-Reference: <4C6E4FD8.6080100@linux.intel.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/mm/init_64.c |    9 ++++++++-
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 61a1b4f..64e7bc2 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -564,8 +564,9 @@ kernel_physical_mapping_init(unsigned long start,
 			     unsigned long end,
 			     unsigned long page_size_mask)
 {
-
+	bool pgd_changed = false;
 	unsigned long next, last_map_addr = end;
+	unsigned long addr;
 
 	start = (unsigned long)__va(start);
 	end = (unsigned long)__va(end);
@@ -593,7 +594,12 @@ kernel_physical_mapping_init(unsigned long start,
 		spin_lock(&init_mm.page_table_lock);
 		pgd_populate(&init_mm, pgd, __va(pud_phys));
 		spin_unlock(&init_mm.page_table_lock);
+		pgd_changed = true;
 	}
+
+	if (pgd_changed)
+		sync_global_pgds(addr, end);
+
 	__flush_tlb_all();
 
 	return last_map_addr;
@@ -1033,6 +1039,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
 		}
 
 	}
+	sync_global_pgds((unsigned long)start_page, end);
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2010-08-26 21:36 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-08-20  9:45 [BUGFIX][PATCH 1/2] x86, mem: separate x86_64 vmalloc_sync_all() into separate functions Haicheng Li
2010-08-20  9:45 ` Haicheng Li
2010-08-20  9:50 ` [BUGFIX][PATCH 2/2] x86, mem: update all PGDs for direct mapping and vmemmap mapping changes on 64bit Haicheng Li
2010-08-20  9:50   ` Haicheng Li
2010-08-25 21:37   ` [tip:x86/mm] x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes tip-bot for Haicheng Li
2010-08-26 21:34   ` tip-bot for Haicheng Li
2010-08-25  7:45 ` [BUGFIX][PATCH 1/2] x86, mem: separate x86_64 vmalloc_sync_all() into separate functions Andi Kleen
2010-08-25  7:45   ` Andi Kleen
2010-08-25 19:14   ` H. Peter Anvin
2010-08-25 19:14     ` H. Peter Anvin
2010-08-25 21:36     ` H. Peter Anvin
2010-08-25 21:36       ` H. Peter Anvin
2010-08-25 21:37 ` [tip:x86/mm] x86, mm: Separate " tip-bot for Haicheng Li
2010-08-26 21:33 ` tip-bot for Haicheng Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.