All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] x86, espfix: init espfix on the boot cpu side
@ 2015-06-26  9:33 Zhu Guihua
  2015-06-27  8:55 ` Borislav Petkov
  0 siblings, 1 reply; 3+ messages in thread
From: Zhu Guihua @ 2015-06-26  9:33 UTC (permalink / raw)
  To: linux-kernel; +Cc: mingo, hpa, luto, luto, bp, tglx, x86, Zhu Guihua

The following lockdep warning occurrs when running with latest kernel:
[    3.178000] ------------[ cut here ]------------
[    3.183000] WARNING: CPU: 128 PID: 0 at kernel/locking/lockdep.c:2755 lockdep_trace_alloc+0xdd/0xe0()
[    3.193000] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
[    3.199000] Modules linked in:

[    3.203000] CPU: 128 PID: 0 Comm: swapper/128 Not tainted 4.1.0-rc3 #70
[    3.221000]  0000000000000000 2d6601fb3e6d4e4c ffff88086fd5fc38 ffffffff81773f0a
[    3.230000]  0000000000000000 ffff88086fd5fc90 ffff88086fd5fc78 ffffffff8108c85a
[    3.238000]  ffff88086fd60000 0000000000000092 ffff88086fd60000 00000000000000d0
[    3.246000] Call Trace:
[    3.249000]  [<ffffffff81773f0a>] dump_stack+0x4c/0x65
[    3.255000]  [<ffffffff8108c85a>] warn_slowpath_common+0x8a/0xc0
[    3.261000]  [<ffffffff8108c8e5>] warn_slowpath_fmt+0x55/0x70
[    3.268000]  [<ffffffff810ee24d>] lockdep_trace_alloc+0xdd/0xe0
[    3.274000]  [<ffffffff811cda0d>] __alloc_pages_nodemask+0xad/0xca0
[    3.281000]  [<ffffffff810ec7ad>] ? __lock_acquire+0xf6d/0x1560
[    3.288000]  [<ffffffff81219c8a>] alloc_page_interleave+0x3a/0x90
[    3.295000]  [<ffffffff8121b32d>] alloc_pages_current+0x17d/0x1a0
[    3.301000]  [<ffffffff811c869e>] ? __get_free_pages+0xe/0x50
[    3.308000]  [<ffffffff811c869e>] __get_free_pages+0xe/0x50
[    3.314000]  [<ffffffff8102640b>] init_espfix_ap+0x17b/0x320
[    3.320000]  [<ffffffff8105c691>] start_secondary+0xf1/0x1f0
[    3.327000] ---[ end trace 1b3327d9d6a1d62c ]---

As we alloc pages with GFP_KERNEL in init_espfix_ap() which is called
before enabled local irq, and the lockdep sub-system considers this
behaviour as allocating memory with GFP_FS with local irq disabled,
then trigger the warning as mentioned about.

So we allocate them on the boot CPU side when the target CPU is bringing
up by the primary CPU, and hand them over to the secondary CPU.

Signed-off-by: Zhu Guihua <zhugh.fnst@cn.fujitsu.com>
---
v2:
 -allocate espfix stack pages when the targert CPU is bringing up by the
  primary CPU
 -commit messages changed
v1:
 -Alloc the page on the node the target CPU is on.
RFC v2:
 -Let the boot up routine init the espfix stack for the target cpu after it
  booted.
---
 arch/x86/include/asm/espfix.h |  2 +-
 arch/x86/kernel/espfix_64.c   | 28 ++++++++++++++++------------
 arch/x86/kernel/smpboot.c     | 14 +++++++-------
 3 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 99efebb..ca3ce9a 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -9,7 +9,7 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
 DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
 
 extern void init_espfix_bsp(void);
-extern void init_espfix_ap(void);
+extern void init_espfix_ap(int cpu);
 
 #endif /* CONFIG_X86_64 */
 
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index f5d0730..f6dd9df 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -131,25 +131,24 @@ void __init init_espfix_bsp(void)
 	init_espfix_random();
 
 	/* The rest is the same as for any other processor */
-	init_espfix_ap();
+	init_espfix_ap(0);
 }
 
-void init_espfix_ap(void)
+void init_espfix_ap(int cpu)
 {
-	unsigned int cpu, page;
+	unsigned int page;
 	unsigned long addr;
 	pud_t pud, *pud_p;
 	pmd_t pmd, *pmd_p;
 	pte_t pte, *pte_p;
-	int n;
+	int n, node;
 	void *stack_page;
 	pteval_t ptemask;
 
 	/* We only have to do this once... */
-	if (likely(this_cpu_read(espfix_stack)))
+	if (likely(per_cpu(espfix_stack, cpu)))
 		return;		/* Already initialized */
 
-	cpu = smp_processor_id();
 	addr = espfix_base_addr(cpu);
 	page = cpu/ESPFIX_STACKS_PER_PAGE;
 
@@ -165,12 +164,15 @@ void init_espfix_ap(void)
 	if (stack_page)
 		goto unlock_done;
 
+	node = cpu_to_node(cpu);
 	ptemask = __supported_pte_mask;
 
 	pud_p = &espfix_pud_page[pud_index(addr)];
 	pud = *pud_p;
 	if (!pud_present(pud)) {
-		pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
+		struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
+
+		pmd_p = (pmd_t *)page_address(page);
 		pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PUD_CLONES; n++)
@@ -180,7 +182,9 @@ void init_espfix_ap(void)
 	pmd_p = pmd_offset(&pud, addr);
 	pmd = *pmd_p;
 	if (!pmd_present(pmd)) {
-		pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
+		struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
+
+		pte_p = (pte_t *)page_address(page);
 		pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
 		paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PMD_CLONES; n++)
@@ -188,7 +192,7 @@ void init_espfix_ap(void)
 	}
 
 	pte_p = pte_offset_kernel(&pmd, addr);
-	stack_page = (void *)__get_free_page(GFP_KERNEL);
+	stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
 	pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
 	for (n = 0; n < ESPFIX_PTE_CLONES; n++)
 		set_pte(&pte_p[n*PTE_STRIDE], pte);
@@ -199,7 +203,7 @@ void init_espfix_ap(void)
 unlock_done:
 	mutex_unlock(&espfix_init_mutex);
 done:
-	this_cpu_write(espfix_stack, addr);
-	this_cpu_write(espfix_waddr, (unsigned long)stack_page
-		       + (addr & ~PAGE_MASK));
+	per_cpu(espfix_stack, cpu) = addr;
+	per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
+		                     + (addr & ~PAGE_MASK);
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8add66b..6ffaa3a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -239,13 +239,6 @@ static void notrace start_secondary(void *unused)
 	check_tsc_sync_target();
 
 	/*
-	 * Enable the espfix hack for this CPU
-	 */
-#ifdef CONFIG_X86_ESPFIX64
-	init_espfix_ap();
-#endif
-
-	/*
 	 * We need to hold vector_lock so there the set of online cpus
 	 * does not change while we are assigning vectors to cpus.  Holding
 	 * this lock ensures we don't half assign or remove an irq from a cpu.
@@ -854,6 +847,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	initial_code = (unsigned long)start_secondary;
 	stack_start  = idle->thread.sp;
 
+	/*
+	 * Enable the espfix hack for this CPU
+	 */
+#ifdef CONFIG_X86_ESPFIX64
+	init_espfix_ap(cpu);
+#endif
+
 	/* So we see what's up */
 	announce_cpu(cpu, apicid);
 
-- 
1.9.3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] x86, espfix: init espfix on the boot cpu side
  2015-06-26  9:33 [PATCH v2] x86, espfix: init espfix on the boot cpu side Zhu Guihua
@ 2015-06-27  8:55 ` Borislav Petkov
  2015-06-29  6:48   ` Ingo Molnar
  0 siblings, 1 reply; 3+ messages in thread
From: Borislav Petkov @ 2015-06-27  8:55 UTC (permalink / raw)
  To: Zhu Guihua, hpa; +Cc: linux-kernel, mingo, luto, luto, tglx, x86

On Fri, Jun 26, 2015 at 05:33:22PM +0800, Zhu Guihua wrote:
> The following lockdep warning occurrs when running with latest kernel:
> [    3.178000] ------------[ cut here ]------------
> [    3.183000] WARNING: CPU: 128 PID: 0 at kernel/locking/lockdep.c:2755 lockdep_trace_alloc+0xdd/0xe0()
> [    3.193000] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
> [    3.199000] Modules linked in:
> 
> [    3.203000] CPU: 128 PID: 0 Comm: swapper/128 Not tainted 4.1.0-rc3 #70
> [    3.221000]  0000000000000000 2d6601fb3e6d4e4c ffff88086fd5fc38 ffffffff81773f0a
> [    3.230000]  0000000000000000 ffff88086fd5fc90 ffff88086fd5fc78 ffffffff8108c85a
> [    3.238000]  ffff88086fd60000 0000000000000092 ffff88086fd60000 00000000000000d0
> [    3.246000] Call Trace:
> [    3.249000]  [<ffffffff81773f0a>] dump_stack+0x4c/0x65
> [    3.255000]  [<ffffffff8108c85a>] warn_slowpath_common+0x8a/0xc0
> [    3.261000]  [<ffffffff8108c8e5>] warn_slowpath_fmt+0x55/0x70
> [    3.268000]  [<ffffffff810ee24d>] lockdep_trace_alloc+0xdd/0xe0
> [    3.274000]  [<ffffffff811cda0d>] __alloc_pages_nodemask+0xad/0xca0
> [    3.281000]  [<ffffffff810ec7ad>] ? __lock_acquire+0xf6d/0x1560
> [    3.288000]  [<ffffffff81219c8a>] alloc_page_interleave+0x3a/0x90
> [    3.295000]  [<ffffffff8121b32d>] alloc_pages_current+0x17d/0x1a0
> [    3.301000]  [<ffffffff811c869e>] ? __get_free_pages+0xe/0x50
> [    3.308000]  [<ffffffff811c869e>] __get_free_pages+0xe/0x50
> [    3.314000]  [<ffffffff8102640b>] init_espfix_ap+0x17b/0x320
> [    3.320000]  [<ffffffff8105c691>] start_secondary+0xf1/0x1f0
> [    3.327000] ---[ end trace 1b3327d9d6a1d62c ]---
> 
> As we alloc pages with GFP_KERNEL in init_espfix_ap() which is called
> before enabled local irq, and the lockdep sub-system considers this
> behaviour as allocating memory with GFP_FS with local irq disabled,
> then trigger the warning as mentioned about.
> 
> So we allocate them on the boot CPU side when the target CPU is bringing
> up by the primary CPU, and hand them over to the secondary CPU.
> 
> Signed-off-by: Zhu Guihua <zhugh.fnst@cn.fujitsu.com>
> ---
> v2:
>  -allocate espfix stack pages when the targert CPU is bringing up by the
>   primary CPU
>  -commit messages changed
> v1:
>  -Alloc the page on the node the target CPU is on.
> RFC v2:
>  -Let the boot up routine init the espfix stack for the target cpu after it
>   booted.

Looks ok to me and it works on the 16-node NUMA guest I was triggering the splat
with.

hpa, is that what you had in mind?

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] x86, espfix: init espfix on the boot cpu side
  2015-06-27  8:55 ` Borislav Petkov
@ 2015-06-29  6:48   ` Ingo Molnar
  0 siblings, 0 replies; 3+ messages in thread
From: Ingo Molnar @ 2015-06-29  6:48 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Zhu Guihua, hpa, linux-kernel, luto, luto, tglx, x86


* Borislav Petkov <bp@alien8.de> wrote:

> On Fri, Jun 26, 2015 at 05:33:22PM +0800, Zhu Guihua wrote:
> > The following lockdep warning occurrs when running with latest kernel:
> > [    3.178000] ------------[ cut here ]------------
> > [    3.183000] WARNING: CPU: 128 PID: 0 at kernel/locking/lockdep.c:2755 lockdep_trace_alloc+0xdd/0xe0()
> > [    3.193000] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
> > [    3.199000] Modules linked in:
> > 
> > [    3.203000] CPU: 128 PID: 0 Comm: swapper/128 Not tainted 4.1.0-rc3 #70
> > [    3.221000]  0000000000000000 2d6601fb3e6d4e4c ffff88086fd5fc38 ffffffff81773f0a
> > [    3.230000]  0000000000000000 ffff88086fd5fc90 ffff88086fd5fc78 ffffffff8108c85a
> > [    3.238000]  ffff88086fd60000 0000000000000092 ffff88086fd60000 00000000000000d0
> > [    3.246000] Call Trace:
> > [    3.249000]  [<ffffffff81773f0a>] dump_stack+0x4c/0x65
> > [    3.255000]  [<ffffffff8108c85a>] warn_slowpath_common+0x8a/0xc0
> > [    3.261000]  [<ffffffff8108c8e5>] warn_slowpath_fmt+0x55/0x70
> > [    3.268000]  [<ffffffff810ee24d>] lockdep_trace_alloc+0xdd/0xe0
> > [    3.274000]  [<ffffffff811cda0d>] __alloc_pages_nodemask+0xad/0xca0
> > [    3.281000]  [<ffffffff810ec7ad>] ? __lock_acquire+0xf6d/0x1560
> > [    3.288000]  [<ffffffff81219c8a>] alloc_page_interleave+0x3a/0x90
> > [    3.295000]  [<ffffffff8121b32d>] alloc_pages_current+0x17d/0x1a0
> > [    3.301000]  [<ffffffff811c869e>] ? __get_free_pages+0xe/0x50
> > [    3.308000]  [<ffffffff811c869e>] __get_free_pages+0xe/0x50
> > [    3.314000]  [<ffffffff8102640b>] init_espfix_ap+0x17b/0x320
> > [    3.320000]  [<ffffffff8105c691>] start_secondary+0xf1/0x1f0
> > [    3.327000] ---[ end trace 1b3327d9d6a1d62c ]---
> > 
> > As we alloc pages with GFP_KERNEL in init_espfix_ap() which is called
> > before enabled local irq, and the lockdep sub-system considers this
> > behaviour as allocating memory with GFP_FS with local irq disabled,
> > then trigger the warning as mentioned about.
> > 
> > So we allocate them on the boot CPU side when the target CPU is bringing
> > up by the primary CPU, and hand them over to the secondary CPU.
> > 
> > Signed-off-by: Zhu Guihua <zhugh.fnst@cn.fujitsu.com>
> > ---
> > v2:
> >  -allocate espfix stack pages when the targert CPU is bringing up by the
> >   primary CPU
> >  -commit messages changed
> > v1:
> >  -Alloc the page on the node the target CPU is on.
> > RFC v2:
> >  -Let the boot up routine init the espfix stack for the target cpu after it
> >   booted.
> 
> Looks ok to me and it works on the 16-node NUMA guest I was triggering the splat
> with.
> 
> hpa, is that what you had in mind?

Looks good to me, but please split it into two parts: one that pushes down the CPU 
index - another that does the actual change.

Should this break anything then being in two parts will make it much easier to 
bisect to.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-06-29  6:49 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-26  9:33 [PATCH v2] x86, espfix: init espfix on the boot cpu side Zhu Guihua
2015-06-27  8:55 ` Borislav Petkov
2015-06-29  6:48   ` Ingo Molnar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.