linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00 of 55] xen64: implement 64-bit Xen support
@ 2008-07-08 22:06 Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 01 of 55] x86/paravirt: Call paravirt_pagetable_setup_{start, done} Jeremy Fitzhardinge
                   ` (55 more replies)
  0 siblings, 56 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Hi Ingo,

Here's the set of patches to implement 64-bit Xen support.

The first part of the series is some more (fairly minor) x86 arch
updates which here missed in the previous series.

Following that are the Xen-specific changes to implement 64-bit
support.  It works fairly well, but I know of a couple of bugs:

 - 32-bit emulation doesn't work properly yet.  Something goes wrong
   with %gs, and a userspace %gs: reference segfaults.

 - It crashes when bringing up secondary CPUs under some combinations
   of config.  I think it isn't quite setting up all the CPU sibling
   topology stuff for the various scheduler policies.  It's trying to
   set up the most simple of arrangements (every CPU is a singleton
   with no shared cache or anything else).  It was quite tricky to
   arrange this...

I hope to have followup patches to address both of these in the next
couple of days.  I expect both fixes will be small.

Thanks,
	  J


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 01 of 55] x86/paravirt: Call paravirt_pagetable_setup_{start, done}
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 02 of 55] pvops-64: call paravirt_post_allocator_init() on setup_arch() Jeremy Fitzhardinge
                   ` (54 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

From: Eduardo Habkost <ehabkost@redhat.com>

Call paravirt_pagetable_setup_{start,done}

These paravirt_ops functions were not being called on x86_64.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/paravirt.c   |    4 ++++
 arch/x86/kernel/setup.c      |    2 ++
 arch/x86/xen/enlighten.c     |    4 ++++
 include/asm-x86/pgtable.h    |   18 ++++++++++++++++++
 include/asm-x86/pgtable_32.h |   15 ---------------
 5 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -29,6 +29,7 @@
 #include <asm/desc.h>
 #include <asm/setup.h>
 #include <asm/arch_hooks.h>
+#include <asm/pgtable.h>
 #include <asm/time.h>
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
@@ -373,6 +374,9 @@
 #ifndef CONFIG_X86_64
 	.pagetable_setup_start = native_pagetable_setup_start,
 	.pagetable_setup_done = native_pagetable_setup_done,
+#else
+	.pagetable_setup_start = paravirt_nop,
+	.pagetable_setup_done = paravirt_nop,
 #endif
 
 	.read_cr2 = native_read_cr2,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -793,7 +793,9 @@
 	vmi_init();
 #endif
 
+	paravirt_pagetable_setup_start(swapper_pg_dir);
 	paging_init();
+	paravirt_pagetable_setup_done(swapper_pg_dir);
 
 #ifdef CONFIG_X86_64
 	map_vsyscall();
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -841,6 +841,7 @@
 
 static __init void xen_pagetable_setup_start(pgd_t *base)
 {
+#ifdef CONFIG_X86_32
 	pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
 	int i;
 
@@ -886,6 +887,7 @@
 	/* Unpin initial Xen pagetable */
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
 			  PFN_DOWN(__pa(xen_start_info->pt_base)));
+#endif	/* CONFIG_X86_32 */
 }
 
 void xen_setup_shared_info(void)
@@ -927,9 +929,11 @@
 
 	xen_setup_shared_info();
 
+#ifdef CONFIG_X86_32
 	/* Actually pin the pagetable down, but we can't set PG_pinned
 	   yet because the page structures don't exist yet. */
 	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
+#endif
 }
 
 static __init void xen_post_allocator_init(void)
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -300,6 +300,14 @@
 /* Install a pte for a particular vaddr in kernel space. */
 void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
+#ifdef CONFIG_X86_32
+extern void native_pagetable_setup_start(pgd_t *base);
+extern void native_pagetable_setup_done(pgd_t *base);
+#else
+static inline void native_pagetable_setup_start(pgd_t *base) {}
+static inline void native_pagetable_setup_done(pgd_t *base) {}
+#endif
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT */
@@ -331,6 +339,16 @@
 
 #define pte_update(mm, addr, ptep)              do { } while (0)
 #define pte_update_defer(mm, addr, ptep)        do { } while (0)
+
+static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
+{
+	native_pagetable_setup_start(base);
+}
+
+static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
+{
+	native_pagetable_setup_done(base);
+}
 #endif	/* CONFIG_PARAVIRT */
 
 #endif	/* __ASSEMBLY__ */
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -177,21 +177,6 @@
  */
 #define update_mmu_cache(vma, address, pte) do { } while (0)
 
-extern void native_pagetable_setup_start(pgd_t *base);
-extern void native_pagetable_setup_done(pgd_t *base);
-
-#ifndef CONFIG_PARAVIRT
-static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
-{
-	native_pagetable_setup_start(base);
-}
-
-static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
-{
-	native_pagetable_setup_done(base);
-}
-#endif	/* !CONFIG_PARAVIRT */
-
 #endif /* !__ASSEMBLY__ */
 
 /*



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 02 of 55] pvops-64: call paravirt_post_allocator_init() on setup_arch()
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 01 of 55] x86/paravirt: Call paravirt_pagetable_setup_{start, done} Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 03 of 55] x86_64: there's no need to preallocate level1_fixmap_pgt Jeremy Fitzhardinge
                   ` (53 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

From: Eduardo Habkost <ehabkost@redhat.com>

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/setup.c |    1 +
 arch/x86/mm/init_32.c   |    2 --
 arch/x86/xen/mmu.c      |    8 +++++---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -796,6 +796,7 @@
 	paravirt_pagetable_setup_start(swapper_pg_dir);
 	paging_init();
 	paravirt_pagetable_setup_done(swapper_pg_dir);
+	paravirt_post_allocator_init();
 
 #ifdef CONFIG_X86_64
 	map_vsyscall();
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -867,8 +867,6 @@
 	 */
 	sparse_init();
 	zone_sizes_init();
-
-	paravirt_post_allocator_init();
 }
 
 /*
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -656,9 +656,11 @@
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
-/* The init_mm pagetable is really pinned as soon as its created, but
-   that's before we have page structures to store the bits.  So do all
-   the book-keeping now. */
+/*
+ * The init_mm pagetable is really pinned as soon as its created, but
+ * that's before we have page structures to store the bits.  So do all
+ * the book-keeping now.
+ */
 static __init int mark_pinned(struct page *page, enum pt_level level)
 {
 	SetPagePinned(page);



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 03 of 55] x86_64: there's no need to preallocate level1_fixmap_pgt
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 01 of 55] x86/paravirt: Call paravirt_pagetable_setup_{start, done} Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 02 of 55] pvops-64: call paravirt_post_allocator_init() on setup_arch() Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 04 of 55] x86: clean up formatting of __switch_to Jeremy Fitzhardinge
                   ` (52 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Early fixmap will allocate its own L1 pagetable page for fixmap
mappings, so there's no need to preallocate one.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/head_64.S |    6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -362,12 +362,6 @@
 	.quad	level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
 
 NEXT_PAGE(level2_fixmap_pgt)
-	.fill	506,8,0
-	.quad	level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
-	/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
-	.fill	5,8,0
-
-NEXT_PAGE(level1_fixmap_pgt)
 	.fill	512,8,0
 
 NEXT_PAGE(level2_ident_pgt)



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 04 of 55] x86: clean up formatting of __switch_to
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (2 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 03 of 55] x86_64: there's no need to preallocate level1_fixmap_pgt Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 05 of 55] x86: use __page_aligned_data/bss Jeremy Fitzhardinge
                   ` (51 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

process_64.c:__switch_to has some very old strange formatting, some of
it dating back to pre-git.  Fix it up.

No functional changes.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/process_64.c |   54 +++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -575,8 +575,8 @@
 struct task_struct *
 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
-	struct thread_struct *prev = &prev_p->thread,
-				 *next = &next_p->thread;
+	struct thread_struct *prev = &prev_p->thread;
+	struct thread_struct *next = &next_p->thread;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 	unsigned fsindex, gsindex;
@@ -624,35 +624,34 @@
 
 	/* 
 	 * Switch FS and GS.
+	 *
+	 * Segment register != 0 always requires a reload.  Also
+	 * reload when it has changed.  When prev process used 64bit
+	 * base always reload to avoid an information leak.
 	 */
-	{ 
-		/* segment register != 0 always requires a reload. 
-		   also reload when it has changed. 
-		   when prev process used 64bit base always reload
-		   to avoid an information leak. */
-		if (unlikely(fsindex | next->fsindex | prev->fs)) {
-			loadsegment(fs, next->fsindex);
-			/* check if the user used a selector != 0
-	                 * if yes clear 64bit base, since overloaded base
-                         * is always mapped to the Null selector
-                         */
-			if (fsindex)
+	if (unlikely(fsindex | next->fsindex | prev->fs)) {
+		loadsegment(fs, next->fsindex);
+		/* 
+		 * Check if the user used a selector != 0; if yes
+		 *  clear 64bit base, since overloaded base is always
+		 *  mapped to the Null selector
+		 */
+		if (fsindex)
 			prev->fs = 0;				
-		}
-		/* when next process has a 64bit base use it */
-		if (next->fs) 
-			wrmsrl(MSR_FS_BASE, next->fs); 
-		prev->fsindex = fsindex;
+	}
+	/* when next process has a 64bit base use it */
+	if (next->fs)
+		wrmsrl(MSR_FS_BASE, next->fs);
+	prev->fsindex = fsindex;
 
-		if (unlikely(gsindex | next->gsindex | prev->gs)) {
-			load_gs_index(next->gsindex);
-			if (gsindex)
+	if (unlikely(gsindex | next->gsindex | prev->gs)) {
+		load_gs_index(next->gsindex);
+		if (gsindex)
 			prev->gs = 0;				
-		}
-		if (next->gs)
-			wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 
-		prev->gsindex = gsindex;
 	}
+	if (next->gs)
+		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
+	prev->gsindex = gsindex;
 
 	/* Must be after DS reload */
 	unlazy_fpu(prev_p);
@@ -665,7 +664,8 @@
 	write_pda(pcurrent, next_p); 
 
 	write_pda(kernelstack,
-	(unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
+		  (unsigned long)task_stack_page(next_p) +
+		  THREAD_SIZE - PDA_STACKOFFSET);
 #ifdef CONFIG_CC_STACKPROTECTOR
 	/*
 	 * Build time only check to make sure the stack_canary is at



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 05 of 55] x86: use __page_aligned_data/bss
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (3 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 04 of 55] x86: clean up formatting of __switch_to Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 06 of 55] x86_64: adjust exception frame in ia32entry Jeremy Fitzhardinge
                   ` (50 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Update arch/x86's use of page-aligned variables.  The change to
arch/x86/xen/mmu.c fixes an actual bug, but the rest are cleanups
and to set a precident.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common_64.c |    4 ++--
 arch/x86/kernel/irq_32.c        |    7 ++-----
 arch/x86/xen/mmu.c              |   15 ++++++---------
 3 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -16,6 +16,7 @@
 #include <asm/i387.h>
 #include <asm/msr.h>
 #include <asm/io.h>
+#include <asm/linkage.h>
 #include <asm/mmu_context.h>
 #include <asm/mtrr.h>
 #include <asm/mce.h>
@@ -512,8 +513,7 @@
 }
 
 char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
-			   DEBUG_STKSZ]
-__attribute__((section(".bss.page_aligned")));
+			   DEBUG_STKSZ] __page_aligned_bss;
 
 extern asmlinkage void ignore_sysret(void);
 
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -83,11 +83,8 @@
 static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
 static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
 
-static char softirq_stack[NR_CPUS * THREAD_SIZE]
-		__attribute__((__section__(".bss.page_aligned")));
-
-static char hardirq_stack[NR_CPUS * THREAD_SIZE]
-		__attribute__((__section__(".bss.page_aligned")));
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
 
 static void call_on_stack(void *func, void *stack)
 {
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -46,6 +46,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/paravirt.h>
+#include <asm/linkage.h>
 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
@@ -60,22 +61,18 @@
 #define TOP_ENTRIES		(MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
 
 /* Placeholder for holes in the address space */
-static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
-	__attribute__((section(".data.page_aligned"))) =
+static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data =
 		{ [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
 
  /* Array of pointers to pages containing p2m entries */
-static unsigned long *p2m_top[TOP_ENTRIES]
-	__attribute__((section(".data.page_aligned"))) =
+static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data =
 		{ [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
 
 /* Arrays of p2m arrays expressed in mfns used for save/restore */
-static unsigned long p2m_top_mfn[TOP_ENTRIES]
-	__attribute__((section(".bss.page_aligned")));
+static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss;
 
-static unsigned long p2m_top_mfn_list[
-			PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
-	__attribute__((section(".bss.page_aligned")));
+static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
+	__page_aligned_bss;
 
 static inline unsigned p2m_top_index(unsigned long pfn)
 {



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 06 of 55] x86_64: adjust exception frame in ia32entry
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (4 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 05 of 55] x86: use __page_aligned_data/bss Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 07 of 55] x86_64: unstatic get_local_pda Jeremy Fitzhardinge
                   ` (49 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

The 32-bit compat int $0x80 entrypoint needs exception frame
adjustment.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/ia32/ia32entry.S |    1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -319,6 +319,7 @@
 	/*CFI_REL_OFFSET	rflags,EFLAGS-RIP*/
 	/*CFI_REL_OFFSET	cs,CS-RIP*/
 	CFI_REL_OFFSET	rip,RIP-RIP
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	SWAPGS
 	/*
 	 * No need to follow this irqs on/off section: the syscall



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 07 of 55] x86_64: unstatic get_local_pda
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (5 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 06 of 55] x86_64: adjust exception frame in ia32entry Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 08 of 55] xen: print backtrace on multicall failure Jeremy Fitzhardinge
                   ` (48 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

This allows Xen's xen_cpu_up() to allocate a pda for the new CPU.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/smpboot.c |    2 +-
 include/asm-x86/smp.h     |    2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -768,7 +768,7 @@
  *
  * Must be called after the _cpu_pda pointer table is initialized.
  */
-static int __cpuinit get_local_pda(int cpu)
+int __cpuinit get_local_pda(int cpu)
 {
 	struct x8664_pda *oldpda, *newpda;
 	unsigned long size = sizeof(struct x8664_pda);
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -24,6 +24,8 @@
 
 extern void (*mtrr_hook)(void);
 extern void zap_low_mappings(void);
+
+extern int __cpuinit get_local_pda(int cpu);
 
 extern int smp_num_siblings;
 extern unsigned int num_processors;



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 08 of 55] xen: print backtrace on multicall failure
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (6 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 07 of 55] x86_64: unstatic get_local_pda Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 09 of 55] xen-netfront: fix xennet_release_tx_bufs() Jeremy Fitzhardinge
                   ` (47 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Print a backtrace if a multicall fails, to help with debugging.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/multicalls.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -76,6 +76,7 @@
 		if (ret) {
 			printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
 			       ret, smp_processor_id());
+			dump_stack();
 			for (i = 0; i < b->mcidx; i++) {
 				printk("  call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
 				       i+1, b->mcidx,



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 09 of 55] xen-netfront: fix xennet_release_tx_bufs()
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (7 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 08 of 55] xen: print backtrace on multicall failure Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 10 of 55] xen: add xen_arch_resume()/xen_timer_resume hook for ia64 support Jeremy Fitzhardinge
                   ` (46 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

From: Isaku Yamahata <yamahata@valinux.co.jp>

After restore on ia64 xen domain, kernel panics as follows.
This patch fixes it.

union skb_entry assumes sizeof(link->skb, pointer) ==
sizeof(list->link, unsigned).
However this isn't true on ia64. So  make link type unsigned long.
And introduced two accesor.

kernel unaligned access to 0xe0000000000000bd, ip=0xa0000001004c2ca0
xenwatch[14]: error during unaligned kernel access
 -1 [1]
Modules linked in:

Pid: 14, CPU 0, comm:             xenwatch
psr : 0000101008422010 ifs : 8000000000000307 ip  : [<a0000001004c2ca0>]    Not tainted (2.6.26-rc4xen-ia64-dirty)
ip is at dev_kfree_skb_irq+0x20/0x1a0
unat: 0000000000000000 pfs : 400000000000040b rsc : 0000000000000007
rnat: 0000000000000000 bsps: 0000000000000000 pr  : 000000000000a941
ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c8a70433f
csd : 0000000000000000 ssd : 0000000000000000
b0  : a0000001003efb70 b6  : a000000100070e40 b7  : a000000100070e40
f6  : 1003e000000fcb75352b1 f7  : 1003e000000000014ff97
f8  : 1003e00fcb74fc3454d80 f9  : 1003e0000000080000000
f10 : 1003e0000000000001431 f11 : 1003e0000000000989680
r1  : a000000100bfcf80 r2  : e0000000000000bd r3  : 000000000000308c
r8  : 0000000000000000 r9  : e00000000fc31310 r10 : a000000100a13b28
r11 : 0000000000000000 r12 : e00000000fd0fdf0 r13 : e00000000fd08000
r14 : 0000000000000000 r15 : e00000000fcc8000 r16 : 0000000000000009
r17 : e000010000104000 r18 : e000010000104000 r19 : a000000100a13b40
r20 : a0000001009c23f0 r21 : a0000001009fd4d0 r22 : 0000000000004000
r23 : 0000000000000000 r24 : fffffffffff04c10 r25 : 0000000000000002
r26 : 0000000000000000 r27 : 0000000000000000 r28 : e00000000fd08bd4
r29 : a0000001007570b8 r30 : a0000001009e5500 r31 : a0000001009e54a0

Call Trace:
 [<a000000100026000>] show_stack+0x40/0xa0
                                sp=e00000000fd0f670 bsp=e00000000fd08f68
 [<a000000100026a60>] show_regs+0x9a0/0x9e0
                                sp=e00000000fd0f840 bsp=e00000000fd08f10
 [<a000000100037680>] die+0x260/0x3a0
                                sp=e00000000fd0f840 bsp=e00000000fd08ec8
 [<a000000100037810>] die_if_kernel+0x50/0x80
                                sp=e00000000fd0f840 bsp=e00000000fd08e98
 [<a00000010003eb40>] ia64_handle_unaligned+0x2ea0/0x2fc0
                                sp=e00000000fd0f840 bsp=e00000000fd08df0
 [<a00000010001ca30>] ia64_prepare_handle_unaligned+0x30/0x60
                                sp=e00000000fd0fa10 bsp=e00000000fd08df0
 [<a00000010005d100>] paravirt_leave_kernel+0x0/0x40
                                sp=e00000000fd0fc20 bsp=e00000000fd08df0
 [<a0000001004c2ca0>] dev_kfree_skb_irq+0x20/0x1a0
                                sp=e00000000fd0fdf0 bsp=e00000000fd08db8
 [<a0000001003efb70>] xennet_release_tx_bufs+0xd0/0x120
                                sp=e00000000fd0fdf0 bsp=e00000000fd08d78
 [<a0000001003f14c0>] backend_changed+0xc40/0xf80
                                sp=e00000000fd0fdf0 bsp=e00000000fd08d08
 [<a00000010034bd50>] otherend_changed+0x190/0x1c0
                                sp=e00000000fd0fe00 bsp=e00000000fd08cc8
 [<a000000100349530>] xenwatch_thread+0x310/0x3c0
                                sp=e00000000fd0fe00 bsp=e00000000fd08ca0
 [<a0000001000cb040>] kthread+0xe0/0x160
                                sp=e00000000fd0fe30 bsp=e00000000fd08c68
 [<a000000100024450>] kernel_thread_helper+0x30/0x60
                                sp=e00000000fd0fe30 bsp=e00000000fd08c40
 [<a00000010001a8a0>] start_kernel_thread+0x20/0x40
                                sp=e00000000fd0fe30 bsp=e00000000fd08c40
Kernel panic - not syncing: Aiee, killing interrupt handler!

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 drivers/net/xen-netfront.c |   20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -92,7 +92,7 @@
 	 */
 	union skb_entry {
 		struct sk_buff *skb;
-		unsigned link;
+		unsigned long link;
 	} tx_skbs[NET_TX_RING_SIZE];
 	grant_ref_t gref_tx_head;
 	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
@@ -125,6 +125,17 @@
 	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 };
 
+static void skb_entry_set_link(union skb_entry *list, unsigned short id)
+{
+	list->link = id;
+}
+
+static int skb_entry_is_link(const union skb_entry *list)
+{
+	BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
+	return ((unsigned long)list->skb < PAGE_OFFSET);
+}
+
 /*
  * Access macros for acquiring freeing slots in tx_skbs[].
  */
@@ -132,7 +143,7 @@
 static void add_id_to_freelist(unsigned *head, union skb_entry *list,
 			       unsigned short id)
 {
-	list[id].link = *head;
+	skb_entry_set_link(&list[id], *head);
 	*head = id;
 }
 
@@ -993,7 +1004,7 @@
 
 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
 		/* Skip over entries which are actually freelist references */
-		if ((unsigned long)np->tx_skbs[i].skb < PAGE_OFFSET)
+		if (skb_entry_is_link(&np->tx_skbs[i]))
 			continue;
 
 		skb = np->tx_skbs[i].skb;
@@ -1123,7 +1134,7 @@
 	/* Initialise tx_skbs as a free chain containing every entry. */
 	np->tx_skb_freelist = 0;
 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
-		np->tx_skbs[i].link = i+1;
+		skb_entry_set_link(&np->tx_skbs[i], i+1);
 		np->grant_tx_ref[i] = GRANT_INVALID_REF;
 	}
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 10 of 55] xen: add xen_arch_resume()/xen_timer_resume hook for ia64 support
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (8 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 09 of 55] xen-netfront: fix xennet_release_tx_bufs() Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 11 of 55] xen: define set_pte from the outset Jeremy Fitzhardinge
                   ` (45 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

From: Isaku Yamahata <yamahata@valinux.co.jp>

add xen_timer_resume() hook.
Timer resume should be done after event channel is resumed.
add xen_arch_resume() hook when ipi becomes usable after resume.
After resume, some cpu specific resource must be reinitialized
on ia64 that can't be set by another cpu.
However available hooks is run once on only one cpu so that ipi has
to be used.
During stop_machine_run() ipi can't be used because interrupt is masked.
So add another hook after stop_machine_run().
Another approach might be use resume hook which is run by
device_resume(). However device_resume() may be executed on
suspend error recovery path. So it is necessary to determine
whether it is executed on real resume path or error recovery path.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 arch/x86/xen/suspend.c |    5 ++++-
 arch/x86/xen/xen-ops.h |    1 -
 drivers/xen/manage.c   |    6 ++++--
 include/xen/xen-ops.h  |    4 +++-
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -38,8 +38,11 @@
 		xen_cpu_initialized_map = cpu_online_map;
 #endif
 		xen_vcpu_restore();
-		xen_timer_resume();
 	}
 
 }
 
+void xen_arch_resume(void)
+{
+	/* nothing */
+}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -37,7 +37,6 @@
 unsigned long xen_get_wallclock(void);
 int xen_set_wallclock(unsigned long time);
 unsigned long long xen_sched_clock(void);
-void xen_timer_resume(void);
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
 
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -68,6 +68,7 @@
 	if (!*cancelled) {
 		xen_irq_resume();
 		xen_console_resume();
+		xen_timer_resume();
 	}
 
 	return 0;
@@ -107,9 +108,10 @@
 		goto out;
 	}
 
-	if (!cancelled)
+	if (!cancelled) {
+		xen_arch_resume();
 		xenbus_resume();
-	else
+	} else
 		xenbus_suspend_cancel();
 
 	device_resume();
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -11,4 +11,7 @@
 void xen_mm_pin_all(void);
 void xen_mm_unpin_all(void);
 
+void xen_timer_resume(void);
+void xen_arch_resume(void);
+
 #endif /* INCLUDE_XEN_OPS_H */



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 11 of 55] xen: define set_pte from the outset
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (9 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 10 of 55] xen: add xen_arch_resume()/xen_timer_resume hook for ia64 support Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 12 of 55] xen64: define asm/xen/interface for 64-bit Jeremy Fitzhardinge
                   ` (44 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

We need set_pte to work from a relatively early point, so enable it
from the start.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |    5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -845,9 +845,6 @@
 	pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
 	int i;
 
-	/* special set_pte for pagetable initialization */
-	pv_mmu_ops.set_pte = xen_set_pte_init;
-
 	init_mm.pgd = base;
 	/*
 	 * copy top-level of Xen-supplied pagetable into place.  This
@@ -1174,7 +1171,7 @@
 	.kmap_atomic_pte = xen_kmap_atomic_pte,
 #endif
 
-	.set_pte = NULL,	/* see xen_pagetable_setup_* */
+	.set_pte = xen_set_pte_init,
 	.set_pte_at = xen_set_pte_at,
 	.set_pmd = xen_set_pmd_hyper,
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 12 of 55] xen64: define asm/xen/interface for 64-bit
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (10 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 11 of 55] xen: define set_pte from the outset Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 13 of 55] xen: make ELF notes work for 32 and 64 bit Jeremy Fitzhardinge
                   ` (43 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Copy 64-bit definitions of various interface structures into place.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.h                 |   12 --
 include/asm-x86/xen/interface.h    |  139 +++++++++++--------------------
 include/asm-x86/xen/interface_32.h |   97 +++++++++++++++++++++
 include/asm-x86/xen/interface_64.h |  159 ++++++++++++++++++++++++++++++++++++
 include/xen/interface/callback.h   |    6 -
 5 files changed, 308 insertions(+), 105 deletions(-)

diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -9,18 +9,6 @@
 	PT_PMD,
 	PT_PTE
 };
-
-/*
- * Page-directory addresses above 4GB do not fit into architectural %cr3.
- * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
- * must use the following accessor macros to pack/unpack valid MFNs.
- *
- * Note that Xen is using the fact that the pagetable base is always
- * page-aligned, and putting the 12 MSB of the address into the 12 LSB
- * of cr3.
- */
-#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
-#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
 
 
 void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h
--- a/include/asm-x86/xen/interface.h
+++ b/include/asm-x86/xen/interface.h
@@ -1,13 +1,13 @@
 /******************************************************************************
  * arch-x86_32.h
  *
- * Guest OS interface to x86 32-bit Xen.
+ * Guest OS interface to x86 Xen.
  *
  * Copyright (c) 2004, K A Fraser
  */
 
-#ifndef __XEN_PUBLIC_ARCH_X86_32_H__
-#define __XEN_PUBLIC_ARCH_X86_32_H__
+#ifndef __ASM_X86_XEN_INTERFACE_H
+#define __ASM_X86_XEN_INTERFACE_H
 
 #ifdef __XEN__
 #define __DEFINE_GUEST_HANDLE(name, type) \
@@ -57,6 +57,17 @@
 DEFINE_GUEST_HANDLE(void);
 #endif
 
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#endif
+
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
+/* Maximum number of virtual CPUs in multi-processor guests. */
+#define MAX_VIRT_CPUS 32
+
 /*
  * SEGMENT DESCRIPTOR TABLES
  */
@@ -71,58 +82,21 @@
 #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
 
 /*
- * These flat segments are in the Xen-private section of every GDT. Since these
- * are also present in the initial GDT, many OSes will be able to avoid
- * installing their own GDT.
- */
-#define FLAT_RING1_CS 0xe019    /* GDT index 259 */
-#define FLAT_RING1_DS 0xe021    /* GDT index 260 */
-#define FLAT_RING1_SS 0xe021    /* GDT index 260 */
-#define FLAT_RING3_CS 0xe02b    /* GDT index 261 */
-#define FLAT_RING3_DS 0xe033    /* GDT index 262 */
-#define FLAT_RING3_SS 0xe033    /* GDT index 262 */
-
-#define FLAT_KERNEL_CS FLAT_RING1_CS
-#define FLAT_KERNEL_DS FLAT_RING1_DS
-#define FLAT_KERNEL_SS FLAT_RING1_SS
-#define FLAT_USER_CS    FLAT_RING3_CS
-#define FLAT_USER_DS    FLAT_RING3_DS
-#define FLAT_USER_SS    FLAT_RING3_SS
-
-/* And the trap vector is... */
-#define TRAP_INSTR "int $0x82"
-
-/*
- * Virtual addresses beyond this are not modifiable by guest OSes. The
- * machine->physical mapping table starts at this address, read-only.
- */
-#ifdef CONFIG_X86_PAE
-#define __HYPERVISOR_VIRT_START 0xF5800000
-#else
-#define __HYPERVISOR_VIRT_START 0xFC000000
-#endif
-
-#ifndef HYPERVISOR_VIRT_START
-#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
-#endif
-
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
-
-/* Maximum number of virtual CPUs in multi-processor guests. */
-#define MAX_VIRT_CPUS 32
-
-#ifndef __ASSEMBLY__
-
-/*
  * Send an array of these to HYPERVISOR_set_trap_table()
+ * The privilege level specifies which modes may enter a trap via a software
+ * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
+ * privilege levels as follows:
+ *  Level == 0: Noone may enter
+ *  Level == 1: Kernel may enter
+ *  Level == 2: Kernel may enter
+ *  Level == 3: Everyone may enter
  */
 #define TI_GET_DPL(_ti)		((_ti)->flags & 3)
 #define TI_GET_IF(_ti)		((_ti)->flags & 4)
 #define TI_SET_DPL(_ti, _dpl)	((_ti)->flags |= (_dpl))
 #define TI_SET_IF(_ti, _if)	((_ti)->flags |= ((!!(_if))<<2))
 
+#ifndef __ASSEMBLY__
 struct trap_info {
     uint8_t       vector;  /* exception vector                              */
     uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
@@ -131,32 +105,21 @@
 };
 DEFINE_GUEST_HANDLE_STRUCT(trap_info);
 
-struct cpu_user_regs {
-    uint32_t ebx;
-    uint32_t ecx;
-    uint32_t edx;
-    uint32_t esi;
-    uint32_t edi;
-    uint32_t ebp;
-    uint32_t eax;
-    uint16_t error_code;    /* private */
-    uint16_t entry_vector;  /* private */
-    uint32_t eip;
-    uint16_t cs;
-    uint8_t  saved_upcall_mask;
-    uint8_t  _pad0;
-    uint32_t eflags;        /* eflags.IF == !saved_upcall_mask */
-    uint32_t esp;
-    uint16_t ss, _pad1;
-    uint16_t es, _pad2;
-    uint16_t ds, _pad3;
-    uint16_t fs, _pad4;
-    uint16_t gs, _pad5;
+struct arch_shared_info {
+    unsigned long max_pfn;                  /* max pfn that appears in table */
+    /* Frame containing list of mfns containing list of mfns containing p2m. */
+    unsigned long pfn_to_mfn_frame_list_list;
+    unsigned long nmi_reason;
 };
-DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs);
+#endif	/* !__ASSEMBLY__ */
 
-typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+#ifdef CONFIG_X86_32
+#include "interface_32.h"
+#else
+#include "interface_64.h"
+#endif
 
+#ifndef __ASSEMBLY__
 /*
  * The following is all CPU context. Note that the fpu_ctxt block is filled
  * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
@@ -173,33 +136,29 @@
     unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
     unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
     unsigned long kernel_ss, kernel_sp;     /* Virtual TSS (only SS1/SP1)   */
+    /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
     unsigned long ctrlreg[8];               /* CR0-CR7 (control registers)  */
     unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
+#ifdef __i386__
     unsigned long event_callback_cs;        /* CS:EIP of event callback     */
     unsigned long event_callback_eip;
     unsigned long failsafe_callback_cs;     /* CS:EIP of failsafe callback  */
     unsigned long failsafe_callback_eip;
+#else
+    unsigned long event_callback_eip;
+    unsigned long failsafe_callback_eip;
+    unsigned long syscall_callback_eip;
+#endif
     unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
+#ifdef __x86_64__
+    /* Segment base addresses. */
+    uint64_t      fs_base;
+    uint64_t      gs_base_kernel;
+    uint64_t      gs_base_user;
+#endif
 };
 DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
-
-struct arch_shared_info {
-    unsigned long max_pfn;                  /* max pfn that appears in table */
-    /* Frame containing list of mfns containing list of mfns containing p2m. */
-    unsigned long pfn_to_mfn_frame_list_list;
-    unsigned long nmi_reason;
-};
-
-struct arch_vcpu_info {
-    unsigned long cr2;
-    unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */
-};
-
-struct xen_callback {
-	unsigned long cs;
-	unsigned long eip;
-};
-#endif /* !__ASSEMBLY__ */
+#endif	/* !__ASSEMBLY__ */
 
 /*
  * Prefix forces emulation of some non-trapping instructions.
@@ -213,4 +172,4 @@
 #define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
 #endif
 
-#endif
+#endif	/* __ASM_X86_XEN_INTERFACE_H */
diff --git a/include/asm-x86/xen/interface_32.h b/include/asm-x86/xen/interface_32.h
new file mode 100644
--- /dev/null
+++ b/include/asm-x86/xen/interface_32.h
@@ -0,0 +1,97 @@
+/******************************************************************************
+ * arch-x86_32.h
+ *
+ * Guest OS interface to x86 32-bit Xen.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __ASM_X86_XEN_INTERFACE_32_H
+#define __ASM_X86_XEN_INTERFACE_32_H
+
+
+/*
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+#define FLAT_RING1_CS 0xe019    /* GDT index 259 */
+#define FLAT_RING1_DS 0xe021    /* GDT index 260 */
+#define FLAT_RING1_SS 0xe021    /* GDT index 260 */
+#define FLAT_RING3_CS 0xe02b    /* GDT index 261 */
+#define FLAT_RING3_DS 0xe033    /* GDT index 262 */
+#define FLAT_RING3_SS 0xe033    /* GDT index 262 */
+
+#define FLAT_KERNEL_CS FLAT_RING1_CS
+#define FLAT_KERNEL_DS FLAT_RING1_DS
+#define FLAT_KERNEL_SS FLAT_RING1_SS
+#define FLAT_USER_CS    FLAT_RING3_CS
+#define FLAT_USER_DS    FLAT_RING3_DS
+#define FLAT_USER_SS    FLAT_RING3_SS
+
+/* And the trap vector is... */
+#define TRAP_INSTR "int $0x82"
+
+/*
+ * Virtual addresses beyond this are not modifiable by guest OSes. The
+ * machine->physical mapping table starts at this address, read-only.
+ */
+#define __HYPERVISOR_VIRT_START 0xF5800000
+
+#ifndef __ASSEMBLY__
+
+struct cpu_user_regs {
+    uint32_t ebx;
+    uint32_t ecx;
+    uint32_t edx;
+    uint32_t esi;
+    uint32_t edi;
+    uint32_t ebp;
+    uint32_t eax;
+    uint16_t error_code;    /* private */
+    uint16_t entry_vector;  /* private */
+    uint32_t eip;
+    uint16_t cs;
+    uint8_t  saved_upcall_mask;
+    uint8_t  _pad0;
+    uint32_t eflags;        /* eflags.IF == !saved_upcall_mask */
+    uint32_t esp;
+    uint16_t ss, _pad1;
+    uint16_t es, _pad2;
+    uint16_t ds, _pad3;
+    uint16_t fs, _pad4;
+    uint16_t gs, _pad5;
+};
+DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs);
+
+typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+
+struct arch_vcpu_info {
+    unsigned long cr2;
+    unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */
+};
+
+struct xen_callback {
+	unsigned long cs;
+	unsigned long eip;
+};
+typedef struct xen_callback xen_callback_t;
+
+#define XEN_CALLBACK(__cs, __eip)				\
+	((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) })
+#endif /* !__ASSEMBLY__ */
+
+
+/*
+ * Page-directory addresses above 4GB do not fit into architectural %cr3.
+ * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
+ * must use the following accessor macros to pack/unpack valid MFNs.
+ *
+ * Note that Xen is using the fact that the pagetable base is always
+ * page-aligned, and putting the 12 MSB of the address into the 12 LSB
+ * of cr3.
+ */
+#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
+#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
+
+#endif	/* __ASM_X86_XEN_INTERFACE_32_H */
diff --git a/include/asm-x86/xen/interface_64.h b/include/asm-x86/xen/interface_64.h
new file mode 100644
--- /dev/null
+++ b/include/asm-x86/xen/interface_64.h
@@ -0,0 +1,159 @@
+#ifndef __ASM_X86_XEN_INTERFACE_64_H
+#define __ASM_X86_XEN_INTERFACE_64_H
+
+/*
+ * 64-bit segment selectors
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+
+#define FLAT_RING3_CS32 0xe023  /* GDT index 260 */
+#define FLAT_RING3_CS64 0xe033  /* GDT index 261 */
+#define FLAT_RING3_DS32 0xe02b  /* GDT index 262 */
+#define FLAT_RING3_DS64 0x0000  /* NULL selector */
+#define FLAT_RING3_SS32 0xe02b  /* GDT index 262 */
+#define FLAT_RING3_SS64 0xe02b  /* GDT index 262 */
+
+#define FLAT_KERNEL_DS64 FLAT_RING3_DS64
+#define FLAT_KERNEL_DS32 FLAT_RING3_DS32
+#define FLAT_KERNEL_DS   FLAT_KERNEL_DS64
+#define FLAT_KERNEL_CS64 FLAT_RING3_CS64
+#define FLAT_KERNEL_CS32 FLAT_RING3_CS32
+#define FLAT_KERNEL_CS   FLAT_KERNEL_CS64
+#define FLAT_KERNEL_SS64 FLAT_RING3_SS64
+#define FLAT_KERNEL_SS32 FLAT_RING3_SS32
+#define FLAT_KERNEL_SS   FLAT_KERNEL_SS64
+
+#define FLAT_USER_DS64 FLAT_RING3_DS64
+#define FLAT_USER_DS32 FLAT_RING3_DS32
+#define FLAT_USER_DS   FLAT_USER_DS64
+#define FLAT_USER_CS64 FLAT_RING3_CS64
+#define FLAT_USER_CS32 FLAT_RING3_CS32
+#define FLAT_USER_CS   FLAT_USER_CS64
+#define FLAT_USER_SS64 FLAT_RING3_SS64
+#define FLAT_USER_SS32 FLAT_RING3_SS32
+#define FLAT_USER_SS   FLAT_USER_SS64
+
+#define __HYPERVISOR_VIRT_START 0xFFFF800000000000
+#define __HYPERVISOR_VIRT_END   0xFFFF880000000000
+#define __MACH2PHYS_VIRT_START  0xFFFF800000000000
+#define __MACH2PHYS_VIRT_END    0xFFFF804000000000
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#define HYPERVISOR_VIRT_END   mk_unsigned_long(__HYPERVISOR_VIRT_END)
+#endif
+
+#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
+/*
+ * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
+ *  @which == SEGBASE_*  ;  @base == 64-bit base address
+ * Returns 0 on success.
+ */
+#define SEGBASE_FS          0
+#define SEGBASE_GS_USER     1
+#define SEGBASE_GS_KERNEL   2
+#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
+
+/*
+ * int HYPERVISOR_iret(void)
+ * All arguments are on the kernel stack, in the following format.
+ * Never returns if successful. Current kernel context is lost.
+ * The saved CS is mapped as follows:
+ *   RING0 -> RING3 kernel mode.
+ *   RING1 -> RING3 kernel mode.
+ *   RING2 -> RING3 kernel mode.
+ *   RING3 -> RING3 user mode.
+ * However RING0 indicates that the guest kernel should return to iteself
+ * directly with
+ *      orb   $3,1*8(%rsp)
+ *      iretq
+ * If flags contains VGCF_in_syscall:
+ *   Restore RAX, RIP, RFLAGS, RSP.
+ *   Discard R11, RCX, CS, SS.
+ * Otherwise:
+ *   Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
+ * All other registers are saved on hypercall entry and restored to user.
+ */
+/* Guest exited in SYSCALL context? Return to guest with SYSRET? */
+#define _VGCF_in_syscall 8
+#define VGCF_in_syscall  (1<<_VGCF_in_syscall)
+#define VGCF_IN_SYSCALL  VGCF_in_syscall
+
+#ifndef __ASSEMBLY__
+
+struct iret_context {
+    /* Top of stack (%rsp at point of hypercall). */
+    uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
+    /* Bottom of iret stack frame. */
+};
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
+#define __DECL_REG(name) union { \
+    uint64_t r ## name, e ## name; \
+    uint32_t _e ## name; \
+}
+#else
+/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */
+#define __DECL_REG(name) uint64_t r ## name
+#endif
+
+struct cpu_user_regs {
+    uint64_t r15;
+    uint64_t r14;
+    uint64_t r13;
+    uint64_t r12;
+    __DECL_REG(bp);
+    __DECL_REG(bx);
+    uint64_t r11;
+    uint64_t r10;
+    uint64_t r9;
+    uint64_t r8;
+    __DECL_REG(ax);
+    __DECL_REG(cx);
+    __DECL_REG(dx);
+    __DECL_REG(si);
+    __DECL_REG(di);
+    uint32_t error_code;    /* private */
+    uint32_t entry_vector;  /* private */
+    __DECL_REG(ip);
+    uint16_t cs, _pad0[1];
+    uint8_t  saved_upcall_mask;
+    uint8_t  _pad1[3];
+    __DECL_REG(flags);      /* rflags.IF == !saved_upcall_mask */
+    __DECL_REG(sp);
+    uint16_t ss, _pad2[3];
+    uint16_t es, _pad3[3];
+    uint16_t ds, _pad4[3];
+    uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base.     */
+    uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */
+};
+DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs);
+
+#undef __DECL_REG
+
+#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)
+#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)
+
+struct arch_vcpu_info {
+    unsigned long cr2;
+    unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+
+typedef unsigned long xen_callback_t;
+
+#define XEN_CALLBACK(__cs, __rip)				\
+	((unsigned long)(__rip))
+
+#endif /* !__ASSEMBLY__ */
+
+
+#endif	/* __ASM_X86_XEN_INTERFACE_64_H */
diff --git a/include/xen/interface/callback.h b/include/xen/interface/callback.h
--- a/include/xen/interface/callback.h
+++ b/include/xen/interface/callback.h
@@ -82,9 +82,9 @@
  */
 #define CALLBACKOP_register                0
 struct callback_register {
-    uint16_t type;
-    uint16_t flags;
-    struct xen_callback address;
+	uint16_t type;
+	uint16_t flags;
+	xen_callback_t address;
 };
 
 /*



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 13 of 55] xen: make ELF notes work for 32 and 64 bit
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (11 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 12 of 55] xen64: define asm/xen/interface for 64-bit Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 14 of 55] xen: fix 64-bit hypercall variants Jeremy Fitzhardinge
                   ` (42 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/xen-head.S |   13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -5,7 +5,10 @@
 
 #include <linux/elfnote.h>
 #include <linux/init.h>
+
 #include <asm/boot.h>
+#include <asm/asm.h>
+
 #include <xen/interface/elfnote.h>
 #include <asm/xen/interface.h>
 
@@ -21,21 +24,21 @@
 .pushsection .text
 	.align PAGE_SIZE_asm
 ENTRY(hypercall_page)
-	.skip 0x1000
+	.skip PAGE_SIZE_asm
 .popsection
 
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz "2.6")
 	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz "xen-3.0")
-	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .long  __PAGE_OFFSET)
-	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long  startup_xen)
-	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long  hypercall_page)
+	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __PAGE_OFFSET)
+	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          _ASM_PTR startup_xen)
+	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz "!writable_page_tables|pae_pgdir_above_4gb")
 	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
 	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
 	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
 		.quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
 	ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
-	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .long __HYPERVISOR_VIRT_START)
+	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   _ASM_PTR __HYPERVISOR_VIRT_START)
 
 #endif /*CONFIG_XEN */



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 14 of 55] xen: fix 64-bit hypercall variants
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (12 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 13 of 55] xen: make ELF notes work for 32 and 64 bit Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 15 of 55] xen64: fix calls into hypercall page Jeremy Fitzhardinge
                   ` (41 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

64-bit guests can pass 64-bit quantities in a single argument,
so fix up the hypercalls.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 include/asm-x86/xen/hypercall.h |   60 ++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -223,12 +223,12 @@
 HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
 			     unsigned long flags)
 {
-	unsigned long pte_hi = 0;
-#ifdef CONFIG_X86_PAE
-	pte_hi = new_val.pte_high;
-#endif
-	return _hypercall4(int, update_va_mapping, va,
-			   new_val.pte_low, pte_hi, flags);
+	if (sizeof(new_val) == sizeof(long))
+		return _hypercall3(int, update_va_mapping, va,
+				   new_val.pte, flags);
+	else
+		return _hypercall4(int, update_va_mapping, va,
+				   new_val.pte, new_val.pte >> 32, flags);
 }
 
 static inline int
@@ -281,12 +281,13 @@
 HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val,
 					 unsigned long flags, domid_t domid)
 {
-	unsigned long pte_hi = 0;
-#ifdef CONFIG_X86_PAE
-	pte_hi = new_val.pte_high;
-#endif
-	return _hypercall5(int, update_va_mapping_otherdomain, va,
-			   new_val.pte_low, pte_hi, flags, domid);
+	if (sizeof(new_val) == sizeof(long))
+		return _hypercall4(int, update_va_mapping_otherdomain, va,
+				   new_val.pte, flags, domid);
+	else
+		return _hypercall5(int, update_va_mapping_otherdomain, va,
+				   new_val.pte, new_val.pte >> 32,
+				   flags, domid);
 }
 
 static inline int
@@ -327,14 +328,14 @@
 {
 	mcl->op = __HYPERVISOR_update_va_mapping;
 	mcl->args[0] = va;
-#ifdef CONFIG_X86_PAE
-	mcl->args[1] = new_val.pte_low;
-	mcl->args[2] = new_val.pte_high;
-#else
-	mcl->args[1] = new_val.pte_low;
-	mcl->args[2] = 0;
-#endif
-	mcl->args[3] = flags;
+	if (sizeof(new_val) == sizeof(long)) {
+		mcl->args[1] = new_val.pte;
+		mcl->args[2] = flags;
+	} else {
+		mcl->args[1] = new_val.pte;
+		mcl->args[2] = new_val.pte >> 32;
+		mcl->args[3] = flags;
+	}
 }
 
 static inline void
@@ -354,15 +355,16 @@
 {
 	mcl->op = __HYPERVISOR_update_va_mapping_otherdomain;
 	mcl->args[0] = va;
-#ifdef CONFIG_X86_PAE
-	mcl->args[1] = new_val.pte_low;
-	mcl->args[2] = new_val.pte_high;
-#else
-	mcl->args[1] = new_val.pte_low;
-	mcl->args[2] = 0;
-#endif
-	mcl->args[3] = flags;
-	mcl->args[4] = domid;
+	if (sizeof(new_val) == sizeof(long)) {
+		mcl->args[1] = new_val.pte;
+		mcl->args[2] = flags;
+		mcl->args[3] = domid;
+	} else {
+		mcl->args[1] = new_val.pte;
+		mcl->args[2] = new_val.pte >> 32;
+		mcl->args[3] = flags;
+		mcl->args[4] = domid;
+	}
 }
 
 static inline void



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 15 of 55] xen64: fix calls into hypercall page
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (13 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 14 of 55] xen: fix 64-bit hypercall variants Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 16 of 55] xen64: add extra pv_mmu_ops Jeremy Fitzhardinge
                   ` (40 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

The 64-bit calling convention for hypercalls uses different registers
from 32-bit.  Annoyingly, gcc's asm syntax doesn't have a way to
specify one of the extra numeric reigisters in a constraint, so we
must use explicitly placed register variables.  Given that we have to
do it for some args, may as well do it for all.

Also fix syntax gcc generates for the call instruction itself.  We
need a plain direct call, but the asm expansion which works on 32-bit
generates a rip-relative addressing mode in 64-bit, which is treated
as an indirect call.  The alternative is to pass the hypercall page
offset into the asm, and have it add it to the hypercall page start
address to generate the call.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 include/asm-x86/xen/hypercall.h |  170 +++++++++++++++++++++++++++------------
 1 file changed, 122 insertions(+), 48 deletions(-)

diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -40,83 +40,157 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
 
+/*
+ * The hypercall asms have to meet several constraints:
+ * - Work on 32- and 64-bit.
+ *    The two architectures put their arguments in different sets of
+ *    registers.
+ *
+ * - Work around asm syntax quirks
+ *    It isn't possible to specify one of the rNN registers in a
+ *    constraint, so we use explicit register variables to get the
+ *    args into the right place.
+ *
+ * - Mark all registers as potentially clobbered
+ *    Even unused parameters can be clobbered by the hypervisor, so we
+ *    need to make sure gcc knows it.
+ *
+ * - Avoid compiler bugs.
+ *    This is the tricky part.  Because x86_32 has such a constrained
+ *    register set, gcc versions below 4.3 have trouble generating
+ *    code when all the arg registers and memory are trashed by the
+ *    asm.  There are syntactically simpler ways of achieving the
+ *    semantics below, but they cause the compiler to crash.
+ *
+ *    The only combination I found which works is:
+ *     - assign the __argX variables first
+ *     - list all actually used parameters as "+r" (__argX)
+ *     - clobber the rest
+ *
+ * The result certainly isn't pretty, and it really shows up cpp's
+ * weakness as as macro language.  Sorry.  (But let's just give thanks
+ * there aren't more than 5 arguments...)
+ */
+
 extern struct { char _entry[32]; } hypercall_page[];
+
+#define __HYPERCALL		"call hypercall_page+%c[offset]"
+#define __HYPERCALL_ENTRY(x)						\
+	[offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0]))
+
+#ifdef CONFIG_X86_32
+#define __HYPERCALL_RETREG	"eax"
+#define __HYPERCALL_ARG1REG	"ebx"
+#define __HYPERCALL_ARG2REG	"ecx"
+#define __HYPERCALL_ARG3REG	"edx"
+#define __HYPERCALL_ARG4REG	"esi"
+#define __HYPERCALL_ARG5REG	"edi"
+#else
+#define __HYPERCALL_RETREG	"rax"
+#define __HYPERCALL_ARG1REG	"rdi"
+#define __HYPERCALL_ARG2REG	"rsi"
+#define __HYPERCALL_ARG3REG	"rdx"
+#define __HYPERCALL_ARG4REG	"r10"
+#define __HYPERCALL_ARG5REG	"r8"
+#endif
+
+#define __HYPERCALL_DECLS						\
+	register unsigned long __res  asm(__HYPERCALL_RETREG);		\
+	register unsigned long __arg1 asm(__HYPERCALL_ARG1REG) = __arg1; \
+	register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
+	register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
+	register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
+	register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
+
+#define __HYPERCALL_0PARAM	"=r" (__res)
+#define __HYPERCALL_1PARAM	__HYPERCALL_0PARAM, "+r" (__arg1)
+#define __HYPERCALL_2PARAM	__HYPERCALL_1PARAM, "+r" (__arg2)
+#define __HYPERCALL_3PARAM	__HYPERCALL_2PARAM, "+r" (__arg3)
+#define __HYPERCALL_4PARAM	__HYPERCALL_3PARAM, "+r" (__arg4)
+#define __HYPERCALL_5PARAM	__HYPERCALL_4PARAM, "+r" (__arg5)
+
+#define __HYPERCALL_0ARG()
+#define __HYPERCALL_1ARG(a1)						\
+	__HYPERCALL_0ARG()		__arg1 = (unsigned long)(a1);
+#define __HYPERCALL_2ARG(a1,a2)						\
+	__HYPERCALL_1ARG(a1)		__arg2 = (unsigned long)(a2);
+#define __HYPERCALL_3ARG(a1,a2,a3)					\
+	__HYPERCALL_2ARG(a1,a2)		__arg3 = (unsigned long)(a3);
+#define __HYPERCALL_4ARG(a1,a2,a3,a4)					\
+	__HYPERCALL_3ARG(a1,a2,a3)	__arg4 = (unsigned long)(a4);
+#define __HYPERCALL_5ARG(a1,a2,a3,a4,a5)				\
+	__HYPERCALL_4ARG(a1,a2,a3,a4)	__arg5 = (unsigned long)(a5);
+
+#define __HYPERCALL_CLOBBER5	"memory"
+#define __HYPERCALL_CLOBBER4	__HYPERCALL_CLOBBER5, __HYPERCALL_ARG5REG
+#define __HYPERCALL_CLOBBER3	__HYPERCALL_CLOBBER4, __HYPERCALL_ARG4REG
+#define __HYPERCALL_CLOBBER2	__HYPERCALL_CLOBBER3, __HYPERCALL_ARG3REG
+#define __HYPERCALL_CLOBBER1	__HYPERCALL_CLOBBER2, __HYPERCALL_ARG2REG
+#define __HYPERCALL_CLOBBER0	__HYPERCALL_CLOBBER1, __HYPERCALL_ARG1REG
 
 #define _hypercall0(type, name)						\
 ({									\
-	long __res;							\
-	asm volatile (							\
-		"call %[call]"						\
-		: "=a" (__res)						\
-		: [call] "m" (hypercall_page[__HYPERVISOR_##name])	\
-		: "memory" );						\
+	__HYPERCALL_DECLS;						\
+	__HYPERCALL_0ARG();						\
+	asm volatile (__HYPERCALL					\
+		      : __HYPERCALL_0PARAM				\
+		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_CLOBBER0);				\
 	(type)__res;							\
 })
 
 #define _hypercall1(type, name, a1)					\
 ({									\
-	long __res, __ign1;						\
-	asm volatile (							\
-		"call %[call]"						\
-		: "=a" (__res), "=b" (__ign1)				\
-		: "1" ((long)(a1)),					\
-		  [call] "m" (hypercall_page[__HYPERVISOR_##name])	\
-		: "memory" );						\
+	__HYPERCALL_DECLS;						\
+	__HYPERCALL_1ARG(a1);						\
+	asm volatile (__HYPERCALL					\
+		      : __HYPERCALL_1PARAM				\
+		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_CLOBBER1);				\
 	(type)__res;							\
 })
 
 #define _hypercall2(type, name, a1, a2)					\
 ({									\
-	long __res, __ign1, __ign2;					\
-	asm volatile (							\
-		"call %[call]"						\
-		: "=a" (__res), "=b" (__ign1), "=c" (__ign2)		\
-		: "1" ((long)(a1)), "2" ((long)(a2)),			\
-		  [call] "m" (hypercall_page[__HYPERVISOR_##name])	\
-		: "memory" );						\
+	__HYPERCALL_DECLS;						\
+	__HYPERCALL_2ARG(a1, a2);					\
+	asm volatile (__HYPERCALL					\
+		      : __HYPERCALL_2PARAM				\
+		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_CLOBBER2);				\
 	(type)__res;							\
 })
 
 #define _hypercall3(type, name, a1, a2, a3)				\
 ({									\
-	long __res, __ign1, __ign2, __ign3;				\
-	asm volatile (							\
-		"call %[call]"						\
-		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),		\
-		"=d" (__ign3)						\
-		: "1" ((long)(a1)), "2" ((long)(a2)),			\
-		  "3" ((long)(a3)),					\
-		  [call] "m" (hypercall_page[__HYPERVISOR_##name])	\
-		: "memory" );						\
+	__HYPERCALL_DECLS;						\
+	__HYPERCALL_3ARG(a1, a2, a3);					\
+	asm volatile (__HYPERCALL					\
+		      : __HYPERCALL_3PARAM				\
+		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_CLOBBER3);				\
 	(type)__res;							\
 })
 
 #define _hypercall4(type, name, a1, a2, a3, a4)				\
 ({									\
-	long __res, __ign1, __ign2, __ign3, __ign4;			\
-	asm volatile (							\
-		"call %[call]"						\
-		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),		\
-		"=d" (__ign3), "=S" (__ign4)				\
-		: "1" ((long)(a1)), "2" ((long)(a2)),			\
-		  "3" ((long)(a3)), "4" ((long)(a4)),			\
-		  [call] "m" (hypercall_page[__HYPERVISOR_##name])	\
-		: "memory" );						\
+	__HYPERCALL_DECLS;						\
+	__HYPERCALL_4ARG(a1, a2, a3, a4);				\
+	asm volatile (__HYPERCALL					\
+		      : __HYPERCALL_4PARAM				\
+		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_CLOBBER4);				\
 	(type)__res;							\
 })
 
 #define _hypercall5(type, name, a1, a2, a3, a4, a5)			\
 ({									\
-	long __res, __ign1, __ign2, __ign3, __ign4, __ign5;		\
-	asm volatile (							\
-		"call %[call]"						\
-		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),		\
-		"=d" (__ign3), "=S" (__ign4), "=D" (__ign5)		\
-		: "1" ((long)(a1)), "2" ((long)(a2)),			\
-		  "3" ((long)(a3)), "4" ((long)(a4)),			\
-		  "5" ((long)(a5)),					\
-		  [call] "m" (hypercall_page[__HYPERVISOR_##name])	\
-		: "memory" );						\
+	__HYPERCALL_DECLS;						\
+	__HYPERCALL_5ARG(a1, a2, a3, a4, a5);				\
+	asm volatile (__HYPERCALL					\
+		      : __HYPERCALL_5PARAM				\
+		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_CLOBBER5);				\
 	(type)__res;							\
 })
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 16 of 55] xen64: add extra pv_mmu_ops
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (14 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 15 of 55] xen64: fix calls into hypercall page Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-09  7:55   ` Mark McLoughlin
  2008-07-08 22:06 ` [PATCH 17 of 55] xen64: random ifdefs to mask out 32-bit only code Jeremy Fitzhardinge
                   ` (39 subsequent siblings)
  55 siblings, 1 reply; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

We need extra pv_mmu_ops for 64-bit, to deal with the extra level of
pagetable.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c   |   33 +++++++++++++++++++++++++++-
 arch/x86/xen/mmu.c         |   51 +++++++++++++++++++++++++++++++++++++++++++-
 arch/x86/xen/mmu.h         |   15 +++++++++++-
 include/asm-x86/xen/page.h |    4 +++
 4 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -803,6 +803,18 @@
 	xen_release_ptpage(pfn, PT_PMD);
 }
 
+#if PAGETABLE_LEVELS == 4
+static void xen_alloc_pud(struct mm_struct *mm, u32 pfn)
+{
+	xen_alloc_ptpage(mm, pfn, PT_PUD);
+}
+
+static void xen_release_pud(u32 pfn)
+{
+	xen_release_ptpage(pfn, PT_PUD);
+}
+#endif
+
 #ifdef CONFIG_HIGHPTE
 static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
 {
@@ -922,6 +934,11 @@
 	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
 	pv_mmu_ops.release_pte = xen_release_pte;
 	pv_mmu_ops.release_pmd = xen_release_pmd;
+#if PAGETABLE_LEVELS == 4
+	pv_mmu_ops.alloc_pud = xen_alloc_pud;
+	pv_mmu_ops.release_pud = xen_release_pud;
+#endif
+
 	pv_mmu_ops.set_pte = xen_set_pte;
 
 	xen_setup_shared_info();
@@ -937,6 +954,9 @@
 {
 	pv_mmu_ops.set_pmd = xen_set_pmd;
 	pv_mmu_ops.set_pud = xen_set_pud;
+#if PAGETABLE_LEVELS == 4
+	pv_mmu_ops.set_pgd = xen_set_pgd;
+#endif
 
 	xen_mark_init_mm_pinned();
 }
@@ -1185,14 +1205,25 @@
 	.make_pte = xen_make_pte,
 	.make_pgd = xen_make_pgd,
 
+#ifdef CONFIG_X86_PAE
 	.set_pte_atomic = xen_set_pte_atomic,
 	.set_pte_present = xen_set_pte_at,
-	.set_pud = xen_set_pud_hyper,
 	.pte_clear = xen_pte_clear,
 	.pmd_clear = xen_pmd_clear,
+#endif	/* CONFIG_X86_PAE */
+	.set_pud = xen_set_pud_hyper,
 
 	.make_pmd = xen_make_pmd,
 	.pmd_val = xen_pmd_val,
+
+#if PAGETABLE_LEVELS == 4
+	.pud_val = xen_pud_val,
+	.make_pud = xen_make_pud,
+	.set_pgd = xen_set_pgd_hyper,
+
+	.alloc_pud = xen_alloc_pte_init,
+	.release_pud = xen_release_pte_init,
+#endif	/* PAGETABLE_LEVELS == 4 */
 
 	.activate_mm = xen_activate_mm,
 	.dup_mmap = xen_dup_mmap,
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -438,14 +438,19 @@
 
 void xen_set_pte(pte_t *ptep, pte_t pte)
 {
+#ifdef CONFIG_X86_PAE
 	ptep->pte_high = pte.pte_high;
 	smp_wmb();
 	ptep->pte_low = pte.pte_low;
+#else
+	*ptep = pte;
+#endif
 }
 
+#ifdef CONFIG_X86_PAE
 void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
-	set_64bit((u64 *)ptep, pte_val_ma(pte));
+	set_64bit((u64 *)ptep, native_pte_val(pte));
 }
 
 void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
@@ -459,12 +464,56 @@
 {
 	set_pmd(pmdp, __pmd(0));
 }
+#endif	/* CONFIG_X86_PAE */
 
 pmd_t xen_make_pmd(pmdval_t pmd)
 {
 	pmd = pte_pfn_to_mfn(pmd);
 	return native_make_pmd(pmd);
 }
+
+#if PAGETABLE_LEVELS == 4
+pudval_t xen_pud_val(pud_t pud)
+{
+	return pte_mfn_to_pfn(pud.pud);
+}
+
+pud_t xen_make_pud(pudval_t pud)
+{
+	pud = pte_pfn_to_mfn(pud);
+
+	return native_make_pud(pud);
+}
+
+void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+{
+	struct mmu_update u;
+
+	preempt_disable();
+
+	xen_mc_batch();
+
+	u.ptr = virt_to_machine(ptr).maddr;
+	u.val = pgd_val_ma(val);
+	extend_mmu_update(&u);
+
+	xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+	preempt_enable();
+}
+
+void xen_set_pgd(pgd_t *ptr, pgd_t val)
+{
+	/* If page is not pinned, we can just update the entry
+	   directly */
+	if (!page_pinned(ptr)) {
+		*ptr = val;
+		return;
+	}
+
+	xen_set_pgd_hyper(ptr, val);
+}
+#endif	/* PAGETABLE_LEVELS == 4 */
 
 /*
   (Yet another) pagetable walker.  This one is intended for pinning a
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -32,13 +32,24 @@
 void xen_set_pte(pte_t *ptep, pte_t pteval);
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval);
+
+#ifdef CONFIG_X86_PAE
 void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
+void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void xen_pmd_clear(pmd_t *pmdp);
+#endif	/* CONFIG_X86_PAE */
+
 void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
 void xen_set_pud(pud_t *ptr, pud_t val);
 void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval);
 void xen_set_pud_hyper(pud_t *ptr, pud_t val);
-void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-void xen_pmd_clear(pmd_t *pmdp);
+
+#if PAGETABLE_LEVELS == 4
+pudval_t xen_pud_val(pud_t pud);
+pud_t xen_make_pud(pudval_t pudval);
+void xen_set_pgd(pgd_t *pgdp, pgd_t pgd);
+void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd);
+#endif
 
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -148,7 +148,11 @@
 }
 
 #define pmd_val_ma(v) ((v).pmd)
+#ifdef __PAGETABLE_PUD_FOLDED
 #define pud_val_ma(v) ((v).pgd.pgd)
+#else
+#define pud_val_ma(v) ((v).pud)
+#endif
 #define __pmd_ma(x)	((pmd_t) { (x) } )
 
 #define pgd_val_ma(x)	((x).pgd)



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 17 of 55] xen64: random ifdefs to mask out 32-bit only code
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (15 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 16 of 55] xen64: add extra pv_mmu_ops Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 18 of 55] xen64: get active_mm from the pda Jeremy Fitzhardinge
                   ` (38 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1296,6 +1296,7 @@
 
 static void __init xen_reserve_top(void)
 {
+#ifdef CONFIG_X86_32
 	unsigned long top = HYPERVISOR_VIRT_START;
 	struct xen_platform_parameters pp;
 
@@ -1303,6 +1304,7 @@
 		top = pp.virt_start;
 
 	reserve_top_address(-top + 2 * PAGE_SIZE);
+#endif	/* CONFIG_X86_32 */
 }
 
 /* First C function to be called on Xen boot */
@@ -1333,6 +1335,11 @@
 
 	machine_ops = xen_machine_ops;
 
+#ifdef CONFIG_X86_64
+	/* Disable until direct per-cpu data access. */
+	have_vcpu_info_placement = 0;
+#endif
+
 #ifdef CONFIG_SMP
 	smp_ops = xen_smp_ops;
 #endif
@@ -1343,9 +1350,11 @@
 
 	pgd = (pgd_t *)xen_start_info->pt_base;
 
+#ifdef CONFIG_X86_32
 	init_pg_tables_start = __pa(pgd);
 	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
 	max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT;
+#endif
 
 	init_mm.pgd = pgd; /* use the Xen pagetables to start */
 
@@ -1372,7 +1381,9 @@
 
 	/* set up basic CPUID stuff */
 	cpu_detect(&new_cpu_data);
+#ifdef CONFIG_X86_32
 	new_cpu_data.hard_math = 1;
+#endif
 	new_cpu_data.x86_capability[0] = cpuid_edx(1);
 
 	/* Poke various useful things into boot_params */
@@ -1388,5 +1399,9 @@
 	}
 
 	/* Start the world */
+#ifdef CONFIG_X86_32
 	i386_start_kernel();
+#else
+	x86_64_start_kernel((char *)&boot_params);
+#endif
 }



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 18 of 55] xen64: get active_mm from the pda
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (16 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 17 of 55] xen64: random ifdefs to mask out 32-bit only code Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 19 of 55] xen: move smp setup into smp.c Jeremy Fitzhardinge
                   ` (37 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

x86_64 stores the active_mm in the pda, so fetch it from there.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |    9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -805,8 +805,15 @@
 static void drop_other_mm_ref(void *info)
 {
 	struct mm_struct *mm = info;
+	struct mm_struct *active_mm;
 
-	if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
+#ifdef CONFIG_X86_64
+	active_mm = read_pda(active_mm);
+#else
+	active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
+#endif
+
+	if (active_mm == mm)
 		leave_mm(smp_processor_id());
 
 	/* If this cpu still has a stale cr3 reference, then make sure



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 19 of 55] xen: move smp setup into smp.c
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (17 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 18 of 55] xen64: get active_mm from the pda Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 20 of 55] x86_64: add workaround for no %gs-based percpu Jeremy Fitzhardinge
                   ` (36 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Move all the smp_ops setup into smp.c, allowing a lot of things to
become static.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   19 +------------------
 arch/x86/xen/smp.c       |   34 ++++++++++++++++++++++++++--------
 arch/x86/xen/xen-ops.h   |   13 +++++--------
 3 files changed, 32 insertions(+), 34 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1237,21 +1237,6 @@
 	.set_fixmap = xen_set_fixmap,
 };
 
-#ifdef CONFIG_SMP
-static const struct smp_ops xen_smp_ops __initdata = {
-	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
-	.smp_prepare_cpus = xen_smp_prepare_cpus,
-	.cpu_up = xen_cpu_up,
-	.smp_cpus_done = xen_smp_cpus_done,
-
-	.smp_send_stop = xen_smp_send_stop,
-	.smp_send_reschedule = xen_smp_send_reschedule,
-
-	.send_call_func_ipi = xen_smp_send_call_function_ipi,
-	.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
-};
-#endif	/* CONFIG_SMP */
-
 static void xen_reboot(int reason)
 {
 	struct sched_shutdown r = { .reason = reason };
@@ -1340,9 +1325,7 @@
 	have_vcpu_info_placement = 0;
 #endif
 
-#ifdef CONFIG_SMP
-	smp_ops = xen_smp_ops;
-#endif
+	xen_smp_init();
 
 	/* Get mfn list */
 	if (!xen_feature(XENFEAT_auto_translated_physmap))
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -152,7 +152,7 @@
 	}
 }
 
-void __init xen_smp_prepare_boot_cpu(void)
+static void __init xen_smp_prepare_boot_cpu(void)
 {
 	int cpu;
 
@@ -176,7 +176,7 @@
 	xen_setup_vcpu_info_placement();
 }
 
-void __init xen_smp_prepare_cpus(unsigned int max_cpus)
+static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned cpu;
 
@@ -276,7 +276,7 @@
 	return 0;
 }
 
-int __cpuinit xen_cpu_up(unsigned int cpu)
+static int __cpuinit xen_cpu_up(unsigned int cpu)
 {
 	struct task_struct *idle = idle_task(cpu);
 	int rc;
@@ -319,7 +319,7 @@
 	return 0;
 }
 
-void xen_smp_cpus_done(unsigned int max_cpus)
+static void xen_smp_cpus_done(unsigned int max_cpus)
 {
 }
 
@@ -335,12 +335,12 @@
 	BUG();
 }
 
-void xen_smp_send_stop(void)
+static void xen_smp_send_stop(void)
 {
 	smp_call_function(stop_self, NULL, 0);
 }
 
-void xen_smp_send_reschedule(int cpu)
+static void xen_smp_send_reschedule(int cpu)
 {
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
@@ -355,7 +355,7 @@
 		xen_send_IPI_one(cpu, vector);
 }
 
-void xen_smp_send_call_function_ipi(cpumask_t mask)
+static void xen_smp_send_call_function_ipi(cpumask_t mask)
 {
 	int cpu;
 
@@ -370,7 +370,7 @@
 	}
 }
 
-void xen_smp_send_call_function_single_ipi(int cpu)
+static void xen_smp_send_call_function_single_ipi(int cpu)
 {
 	xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
 }
@@ -394,3 +394,21 @@
 
 	return IRQ_HANDLED;
 }
+
+static const struct smp_ops xen_smp_ops __initdata = {
+	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
+	.smp_prepare_cpus = xen_smp_prepare_cpus,
+	.cpu_up = xen_cpu_up,
+	.smp_cpus_done = xen_smp_cpus_done,
+
+	.smp_send_stop = xen_smp_send_stop,
+	.smp_send_reschedule = xen_smp_send_reschedule,
+
+	.send_call_func_ipi = xen_smp_send_call_function_ipi,
+	.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
+};
+
+void __init xen_smp_init(void)
+{
+	smp_ops = xen_smp_ops;
+}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -47,17 +47,14 @@
 void __init xen_fill_possible_map(void);
 
 void __init xen_setup_vcpu_info_placement(void);
-void xen_smp_prepare_boot_cpu(void);
-void xen_smp_prepare_cpus(unsigned int max_cpus);
-int xen_cpu_up(unsigned int cpu);
-void xen_smp_cpus_done(unsigned int max_cpus);
 
-void xen_smp_send_stop(void);
-void xen_smp_send_reschedule(int cpu);
-void xen_smp_send_call_function_ipi(cpumask_t mask);
-void xen_smp_send_call_function_single_ipi(int cpu);
+#ifdef CONFIG_SMP
+void xen_smp_init(void);
 
 extern cpumask_t xen_cpu_initialized_map;
+#else
+static inline void xen_smp_init(void) {}
+#endif
 
 
 /* Declare an asm function, along with symbols needed to make it



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 20 of 55] x86_64: add workaround for no %gs-based percpu
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (18 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 19 of 55] xen: move smp setup into smp.c Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 21 of 55] xen64: smp.c compile hacking Jeremy Fitzhardinge
                   ` (35 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

As a stopgap until Mike Travis's x86-64 gs-based percpu patches are
ready, provide workaround functions for x86_read/write_percpu for
Xen's use.

Specifically, this means that we can't really make use of vcpu
placement, because we can't use a single gs-based memory access to get
to vcpu fields.  So disable all that for now.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/head64.c |   11 ++++++++---
 arch/x86/xen/enlighten.c |    5 +++++
 include/asm-x86/percpu.h |   26 ++++++++++++++++++++++++++
 include/asm-x86/setup.h  |    1 +
 4 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -38,6 +38,13 @@
 #else
 static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
 #endif
+
+void __init x86_64_init_pda(void)
+{
+	_cpu_pda = __cpu_pda;
+	cpu_pda(0) = &_boot_cpu_pda;
+	pda_init(0);
+}
 
 static void __init zap_identity_mappings(void)
 {
@@ -102,9 +109,7 @@
 
 	early_printk("Kernel alive\n");
 
-	_cpu_pda = __cpu_pda;
-	cpu_pda(0) = &_boot_cpu_pda;
-	pda_init(0);
+	x86_64_init_pda();
 
 	early_printk("Kernel really alive\n");
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -971,6 +971,7 @@
 
 	/* xen_vcpu_setup managed to place the vcpu_info within the
 	   percpu area for all cpus, so make use of it */
+#ifdef CONFIG_X86_32
 	if (have_vcpu_info_placement) {
 		printk(KERN_INFO "Xen: using vcpu_info placement\n");
 
@@ -980,6 +981,7 @@
 		pv_irq_ops.irq_enable = xen_irq_enable_direct;
 		pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
 	}
+#endif
 }
 
 static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
@@ -1000,10 +1002,12 @@
 	goto patch_site
 
 	switch (type) {
+#ifdef CONFIG_X86_32
 		SITE(pv_irq_ops, irq_enable);
 		SITE(pv_irq_ops, irq_disable);
 		SITE(pv_irq_ops, save_fl);
 		SITE(pv_irq_ops, restore_fl);
+#endif /* CONFIG_X86_32 */
 #undef SITE
 
 	patch_site:
@@ -1323,6 +1327,7 @@
 #ifdef CONFIG_X86_64
 	/* Disable until direct per-cpu data access. */
 	have_vcpu_info_placement = 0;
+	x86_64_init_pda();
 #endif
 
 	xen_smp_init();
diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -21,6 +21,32 @@
 #include <asm-generic/percpu.h>
 
 DECLARE_PER_CPU(struct x8664_pda, pda);
+
+/*
+ * These are supposed to be implemented as a single instruction which
+ * operates on the per-cpu data base segment.  x86-64 doesn't have
+ * that yet, so this is a fairly inefficient workaround for the
+ * meantime.  The single instruction is atomic with respect to
+ * preemption and interrupts, so we need to explicitly disable
+ * interrupts here to achieve the same effect.  However, because it
+ * can be used from within interrupt-disable/enable, we can't actually
+ * disable interrupts; disabling preemption is enough.
+ */
+#define x86_read_percpu(var)						\
+	({								\
+		typeof(per_cpu_var(var)) __tmp;				\
+		preempt_disable();					\
+		__tmp = __get_cpu_var(var);				\
+		preempt_enable();					\
+		__tmp;							\
+	})
+
+#define x86_write_percpu(var, val)					\
+	do {								\
+		preempt_disable();					\
+		__get_cpu_var(var) = (val);				\
+		preempt_enable();					\
+	} while(0)
 
 #else /* CONFIG_X86_64 */
 
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -57,6 +57,7 @@
 extern unsigned long init_pg_tables_end;
 
 #else
+void __init x86_64_init_pda(void);
 void __init x86_64_start_kernel(char *real_mode);
 void __init x86_64_start_reservations(char *real_mode_data);
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 21 of 55] xen64: smp.c compile hacking
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (19 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 20 of 55] x86_64: add workaround for no %gs-based percpu Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 22 of 55] xen64: add xen-head code to head_64.S Jeremy Fitzhardinge
                   ` (34 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

A number of random changes to make xen/smp.c compile in 64-bit mode.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>a
---
 arch/x86/xen/setup.c   |    7 ---
 arch/x86/xen/smp.c     |   98 +++++++++++++++++++++++++++---------------------
 arch/x86/xen/xen-ops.h |    2
 3 files changed, 58 insertions(+), 49 deletions(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -98,7 +98,7 @@
 	/* Mask events on entry, even though they get enabled immediately */
 	static struct callback_register sysenter = {
 		.type = CALLBACKTYPE_sysenter,
-		.address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
+		.address = XEN_CALLBACK(__KERNEL_CS, xen_sysenter_target),
 		.flags = CALLBACKF_mask_events,
 	};
 
@@ -143,11 +143,6 @@
 
 	pm_idle = xen_idle;
 
-#ifdef CONFIG_SMP
-	/* fill cpus_possible with all available cpus */
-	xen_fill_possible_map();
-#endif
-
 	paravirt_disable_iospace();
 
 	fiddle_vdso();
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -66,12 +66,20 @@
 	int cpu = smp_processor_id();
 
 	cpu_init();
+	preempt_disable();
+
 	xen_enable_sysenter();
 
-	preempt_disable();
-	per_cpu(cpu_state, cpu) = CPU_ONLINE;
+	cpu = smp_processor_id();
+	smp_store_cpu_info(cpu);
+	cpu_data(cpu).x86_max_cores = 1;
+	set_cpu_sibling_map(cpu);
 
 	xen_setup_cpu_clockevents();
+
+	cpu_set(cpu, cpu_online_map);
+	x86_write_percpu(cpu_state, CPU_ONLINE);
+	wmb();
 
 	/* We can take interrupts now: we're officially "up". */
 	local_irq_enable();
@@ -141,7 +149,7 @@
 	return rc;
 }
 
-void __init xen_fill_possible_map(void)
+static void __init xen_fill_possible_map(void)
 {
 	int i, rc;
 
@@ -154,24 +162,12 @@
 
 static void __init xen_smp_prepare_boot_cpu(void)
 {
-	int cpu;
-
 	BUG_ON(smp_processor_id() != 0);
 	native_smp_prepare_boot_cpu();
 
 	/* We've switched to the "real" per-cpu gdt, so make sure the
 	   old memory can be recycled */
-	make_lowmem_page_readwrite(&per_cpu__gdt_page);
-
-	for_each_possible_cpu(cpu) {
-		cpus_clear(per_cpu(cpu_sibling_map, cpu));
-		/*
-		 * cpu_core_map lives in a per cpu area that is cleared
-		 * when the per cpu array is allocated.
-		 *
-		 * cpus_clear(per_cpu(cpu_core_map, cpu));
-		 */
-	}
+	make_lowmem_page_readwrite(&per_cpu_var(gdt_page));
 
 	xen_setup_vcpu_info_placement();
 }
@@ -180,17 +176,8 @@
 {
 	unsigned cpu;
 
-	for_each_possible_cpu(cpu) {
-		cpus_clear(per_cpu(cpu_sibling_map, cpu));
-		/*
-		 * cpu_core_ map will be zeroed when the per
-		 * cpu area is allocated.
-		 *
-		 * cpus_clear(per_cpu(cpu_core_map, cpu));
-		 */
-	}
-
 	smp_store_cpu_info(0);
+	cpu_data(0).x86_max_cores = 1;
 	set_cpu_sibling_map(0);
 
 	if (xen_smp_intr_init(0))
@@ -225,7 +212,7 @@
 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 {
 	struct vcpu_guest_context *ctxt;
-	struct gdt_page *gdt = &per_cpu(gdt_page, cpu);
+	struct desc_struct *gdt;
 
 	if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
 		return 0;
@@ -234,12 +221,15 @@
 	if (ctxt == NULL)
 		return -ENOMEM;
 
+	gdt = get_cpu_gdt_table(cpu);
+
 	ctxt->flags = VGCF_IN_KERNEL;
 	ctxt->user_regs.ds = __USER_DS;
 	ctxt->user_regs.es = __USER_DS;
+	ctxt->user_regs.ss = __KERNEL_DS;
+#ifdef CONFIG_X86_32
 	ctxt->user_regs.fs = __KERNEL_PERCPU;
-	ctxt->user_regs.gs = 0;
-	ctxt->user_regs.ss = __KERNEL_DS;
+#endif
 	ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
 	ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
 
@@ -249,11 +239,11 @@
 
 	ctxt->ldt_ents = 0;
 
-	BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK);
-	make_lowmem_page_readonly(gdt->gdt);
+	BUG_ON((unsigned long)gdt & ~PAGE_MASK);
+	make_lowmem_page_readonly(gdt);
 
-	ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt);
-	ctxt->gdt_ents      = ARRAY_SIZE(gdt->gdt);
+	ctxt->gdt_frames[0] = virt_to_mfn(gdt);
+	ctxt->gdt_ents      = GDT_ENTRIES;
 
 	ctxt->user_regs.cs = __KERNEL_CS;
 	ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
@@ -261,9 +251,11 @@
 	ctxt->kernel_ss = __KERNEL_DS;
 	ctxt->kernel_sp = idle->thread.sp0;
 
+#ifdef CONFIG_X86_32
 	ctxt->event_callback_cs     = __KERNEL_CS;
+	ctxt->failsafe_callback_cs  = __KERNEL_CS;
+#endif
 	ctxt->event_callback_eip    = (unsigned long)xen_hypervisor_callback;
-	ctxt->failsafe_callback_cs  = __KERNEL_CS;
 	ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
 
 	per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
@@ -287,10 +279,27 @@
 		return rc;
 #endif
 
+#ifdef CONFIG_X86_64
+	/* Allocate node local memory for AP pdas */
+	WARN_ON(cpu == 0);
+	if (cpu > 0) {
+		rc = get_local_pda(cpu);
+		if (rc)
+			return rc;
+	}
+#endif
+
+#ifdef CONFIG_X86_32
 	init_gdt(cpu);
 	per_cpu(current_task, cpu) = idle;
 	irq_ctx_init(cpu);
+#else
+	cpu_pda(cpu)->pcurrent = idle;
+	clear_tsk_thread_flag(idle, TIF_FORK);
+#endif
 	xen_setup_timer(cpu);
+
+	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
 
 	/* make sure interrupts start blocked */
 	per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
@@ -306,15 +315,13 @@
 	if (rc)
 		return rc;
 
-	smp_store_cpu_info(cpu);
-	set_cpu_sibling_map(cpu);
-	/* This must be done before setting cpu_online_map */
-	wmb();
-
-	cpu_set(cpu, cpu_online_map);
-
 	rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
 	BUG_ON(rc);
+
+	while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
+		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+		barrier();
+	}
 
 	return 0;
 }
@@ -379,7 +386,11 @@
 {
 	irq_enter();
 	generic_smp_call_function_interrupt();
+#ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
+#else
+	add_pda(irq_call_count, 1);
+#endif
 	irq_exit();
 
 	return IRQ_HANDLED;
@@ -389,7 +400,11 @@
 {
 	irq_enter();
 	generic_smp_call_function_single_interrupt();
+#ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
+#else
+	add_pda(irq_call_count, 1);
+#endif
 	irq_exit();
 
 	return IRQ_HANDLED;
@@ -411,4 +426,5 @@
 void __init xen_smp_init(void)
 {
 	smp_ops = xen_smp_ops;
+	xen_fill_possible_map();
 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -44,8 +44,6 @@
 
 void xen_mark_init_mm_pinned(void);
 
-void __init xen_fill_possible_map(void);
-
 void __init xen_setup_vcpu_info_placement(void);
 
 #ifdef CONFIG_SMP



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 22 of 55] xen64: add xen-head code to head_64.S
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (20 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 21 of 55] xen64: smp.c compile hacking Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 23 of 55] xen64: add asm-offsets Jeremy Fitzhardinge
                   ` (33 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Add the Xen entrypoint and ELF notes to head_64.S.  Adapts xen-head.S
to compile either 32-bit or 64-bit.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/asm-offsets_64.c |    3 +++
 arch/x86/kernel/head_64.S        |    1 +
 arch/x86/xen/xen-head.S          |   15 +++++++++++++--
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -131,5 +131,8 @@
 	OFFSET(BP_loadflags, boot_params, hdr.loadflags);
 	OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
 	OFFSET(BP_version, boot_params, hdr.version);
+
+	BLANK();
+	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
 	return 0;
 }
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -401,6 +401,7 @@
 	/* This must match the first entry in level2_kernel_pgt */
 	.quad   0x0000000000000000
 
+#include "../../x86/xen/xen-head.S"
 	
 	.section .bss, "aw", @nobits
 	.align L1_CACHE_BYTES
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -8,15 +8,21 @@
 
 #include <asm/boot.h>
 #include <asm/asm.h>
+#include <asm/page.h>
 
 #include <xen/interface/elfnote.h>
 #include <asm/xen/interface.h>
 
 	__INIT
 ENTRY(startup_xen)
-	movl %esi,xen_start_info
 	cld
-	movl $(init_thread_union+THREAD_SIZE),%esp
+#ifdef CONFIG_X86_32
+	mov %esi,xen_start_info
+	mov $init_thread_union+THREAD_SIZE,%esp
+#else
+	mov %rsi,xen_start_info
+	mov $init_thread_union+THREAD_SIZE,%rsp
+#endif
 	jmp xen_start_kernel
 
 	__FINIT
@@ -30,7 +36,11 @@
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz "2.6")
 	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz "xen-3.0")
+#ifdef CONFIG_X86_32
 	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __PAGE_OFFSET)
+#else
+	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __START_KERNEL_map)
+#endif
 	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          _ASM_PTR startup_xen)
 	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz "!writable_page_tables|pae_pgdir_above_4gb")
@@ -40,5 +50,6 @@
 		.quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
 	ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
 	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   _ASM_PTR __HYPERVISOR_VIRT_START)
+	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   _ASM_PTR 0)
 
 #endif /*CONFIG_XEN */



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 23 of 55] xen64: add asm-offsets
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (21 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 22 of 55] xen64: add xen-head code to head_64.S Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 24 of 55] xen64: add 64-bit assembler Jeremy Fitzhardinge
                   ` (32 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Add Xen vcpu_info offsets to asm-offsets_64.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/asm-offsets_64.c |    8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -17,6 +17,8 @@
 #include <asm/thread_info.h>
 #include <asm/ia32.h>
 #include <asm/bootparam.h>
+
+#include <xen/interface/xen.h>
 
 #define __NO_STUBS 1
 #undef __SYSCALL
@@ -134,5 +136,11 @@
 
 	BLANK();
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
+#ifdef CONFIG_XEN
+	BLANK();
+	OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
+	OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+#undef ENTRY
+#endif
 	return 0;
 }



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 24 of 55] xen64: add 64-bit assembler
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (22 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 23 of 55] xen64: add asm-offsets Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 25 of 55] xen64: use set_fixmap for shared_info structure Jeremy Fitzhardinge
                   ` (31 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Split xen-asm into 32- and 64-bit files, and implement the 64-bit
variants.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/Makefile     |    2
 arch/x86/xen/xen-asm.S    |  305 ---------------------------------------------
 arch/x86/xen/xen-asm_32.S |  305 +++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/xen-asm_64.S |  141 ++++++++++++++++++++
 4 files changed, 447 insertions(+), 306 deletions(-)

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o \
-			time.o xen-asm.o grant-table.o suspend.o
+			time.o xen-asm_$(BITS).o grant-table.o suspend.o
 
 obj-$(CONFIG_SMP)	+= smp.o
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
deleted file mode 100644
--- a/arch/x86/xen/xen-asm.S
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
-	Asm versions of Xen pv-ops, suitable for either direct use or inlining.
-	The inline versions are the same as the direct-use versions, with the
-	pre- and post-amble chopped off.
-
-	This code is encoded for size rather than absolute efficiency,
-	with a view to being able to inline as much as possible.
-
-	We only bother with direct forms (ie, vcpu in pda) of the operations
-	here; the indirect forms are better handled in C, since they're
-	generally too large to inline anyway.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/percpu.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-
-#include <xen/interface/xen.h>
-
-#define RELOC(x, v)	.globl x##_reloc; x##_reloc=v
-#define ENDPATCH(x)	.globl x##_end; x##_end=.
-
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
-#define XEN_EFLAGS_NMI	0x80000000
-
-/*
-	Enable events.  This clears the event mask and tests the pending
-	event status with one and operation.  If there are pending
-	events, then enter the hypervisor to get them handled.
- */
-ENTRY(xen_irq_enable_direct)
-	/* Unmask events */
-	movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-
-	/* Preempt here doesn't matter because that will deal with
-	   any pending interrupts.  The pending check may end up being
-	   run on the wrong CPU, but that doesn't hurt. */
-
-	/* Test for pending */
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
-	jz 1f
-
-2:	call check_events
-1:
-ENDPATCH(xen_irq_enable_direct)
-	ret
-	ENDPROC(xen_irq_enable_direct)
-	RELOC(xen_irq_enable_direct, 2b+1)
-
-
-/*
-	Disabling events is simply a matter of making the event mask
-	non-zero.
- */
-ENTRY(xen_irq_disable_direct)
-	movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-ENDPATCH(xen_irq_disable_direct)
-	ret
-	ENDPROC(xen_irq_disable_direct)
-	RELOC(xen_irq_disable_direct, 0)
-
-/*
-	(xen_)save_fl is used to get the current interrupt enable status.
-	Callers expect the status to be in X86_EFLAGS_IF, and other bits
-	may be set in the return value.  We take advantage of this by
-	making sure that X86_EFLAGS_IF has the right value (and other bits
-	in that byte are 0), but other bits in the return value are
-	undefined.  We need to toggle the state of the bit, because
-	Xen and x86 use opposite senses (mask vs enable).
- */
-ENTRY(xen_save_fl_direct)
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-	setz %ah
-	addb %ah,%ah
-ENDPATCH(xen_save_fl_direct)
-	ret
-	ENDPROC(xen_save_fl_direct)
-	RELOC(xen_save_fl_direct, 0)
-
-
-/*
-	In principle the caller should be passing us a value return
-	from xen_save_fl_direct, but for robustness sake we test only
-	the X86_EFLAGS_IF flag rather than the whole byte. After
-	setting the interrupt mask state, it checks for unmasked
-	pending events and enters the hypervisor to get them delivered
-	if so.
- */
-ENTRY(xen_restore_fl_direct)
-	testb $X86_EFLAGS_IF>>8, %ah
-	setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-	/* Preempt here doesn't matter because that will deal with
-	   any pending interrupts.  The pending check may end up being
-	   run on the wrong CPU, but that doesn't hurt. */
-
-	/* check for unmasked and pending */
-	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
-	jz 1f
-2:	call check_events
-1:
-ENDPATCH(xen_restore_fl_direct)
-	ret
-	ENDPROC(xen_restore_fl_direct)
-	RELOC(xen_restore_fl_direct, 2b+1)
-
-/*
-	We can't use sysexit directly, because we're not running in ring0.
-	But we can easily fake it up using iret.  Assuming xen_sysexit
-	is jumped to with a standard stack frame, we can just strip it
-	back to a standard iret frame and use iret.
- */
-ENTRY(xen_sysexit)
-	movl PT_EAX(%esp), %eax			/* Shouldn't be necessary? */
-	orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
-	lea PT_EIP(%esp), %esp
-
-	jmp xen_iret
-ENDPROC(xen_sysexit)
-
-/*
-	This is run where a normal iret would be run, with the same stack setup:
-	      8: eflags
-	      4: cs
-	esp-> 0: eip
-
-	This attempts to make sure that any pending events are dealt
-	with on return to usermode, but there is a small window in
-	which an event can happen just before entering usermode.  If
-	the nested interrupt ends up setting one of the TIF_WORK_MASK
-	pending work flags, they will not be tested again before
-	returning to usermode. This means that a process can end up
-	with pending work, which will be unprocessed until the process
-	enters and leaves the kernel again, which could be an
-	unbounded amount of time.  This means that a pending signal or
-	reschedule event could be indefinitely delayed.
-
-	The fix is to notice a nested interrupt in the critical
-	window, and if one occurs, then fold the nested interrupt into
-	the current interrupt stack frame, and re-process it
-	iteratively rather than recursively.  This means that it will
-	exit via the normal path, and all pending work will be dealt
-	with appropriately.
-
-	Because the nested interrupt handler needs to deal with the
-	current stack state in whatever form its in, we keep things
-	simple by only using a single register which is pushed/popped
-	on the stack.
- */
-ENTRY(xen_iret)
-	/* test eflags for special cases */
-	testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
-	jnz hyper_iret
-
-	push %eax
-	ESP_OFFSET=4	# bytes pushed onto stack
-
-	/* Store vcpu_info pointer for easy access.  Do it this
-	   way to avoid having to reload %fs */
-#ifdef CONFIG_SMP
-	GET_THREAD_INFO(%eax)
-	movl TI_cpu(%eax),%eax
-	movl __per_cpu_offset(,%eax,4),%eax
-	mov per_cpu__xen_vcpu(%eax),%eax
-#else
-	movl per_cpu__xen_vcpu, %eax
-#endif
-
-	/* check IF state we're restoring */
-	testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
-
-	/* Maybe enable events.  Once this happens we could get a
-	   recursive event, so the critical region starts immediately
-	   afterwards.  However, if that happens we don't end up
-	   resuming the code, so we don't have to be worried about
-	   being preempted to another CPU. */
-	setz XEN_vcpu_info_mask(%eax)
-xen_iret_start_crit:
-
-	/* check for unmasked and pending */
-	cmpw $0x0001, XEN_vcpu_info_pending(%eax)
-
-	/* If there's something pending, mask events again so we
-	   can jump back into xen_hypervisor_callback */
-	sete XEN_vcpu_info_mask(%eax)
-
-	popl %eax
-
-	/* From this point on the registers are restored and the stack
-	   updated, so we don't need to worry about it if we're preempted */
-iret_restore_end:
-
-	/* Jump to hypervisor_callback after fixing up the stack.
-	   Events are masked, so jumping out of the critical
-	   region is OK. */
-	je xen_hypervisor_callback
-
-1:	iret
-xen_iret_end_crit:
-.section __ex_table,"a"
-	.align 4
-	.long 1b,iret_exc
-.previous
-
-hyper_iret:
-	/* put this out of line since its very rarely used */
-	jmp hypercall_page + __HYPERVISOR_iret * 32
-
-	.globl xen_iret_start_crit, xen_iret_end_crit
-
-/*
-   This is called by xen_hypervisor_callback in entry.S when it sees
-   that the EIP at the time of interrupt was between xen_iret_start_crit
-   and xen_iret_end_crit.  We're passed the EIP in %eax so we can do
-   a more refined determination of what to do.
-
-   The stack format at this point is:
-	----------------
-	 ss		: (ss/esp may be present if we came from usermode)
-	 esp		:
-	 eflags		}  outer exception info
-	 cs		}
-	 eip		}
-	---------------- <- edi (copy dest)
-	 eax		:  outer eax if it hasn't been restored
-	----------------
-	 eflags		}  nested exception info
-	 cs		}   (no ss/esp because we're nested
-	 eip		}    from the same ring)
-	 orig_eax	}<- esi (copy src)
-	 - - - - - - - -
-	 fs		}
-	 es		}
-	 ds		}  SAVE_ALL state
-	 eax		}
-	  :		:
-	 ebx		}<- esp
-	----------------
-
-   In order to deliver the nested exception properly, we need to shift
-   everything from the return addr up to the error code so it
-   sits just under the outer exception info.  This means that when we
-   handle the exception, we do it in the context of the outer exception
-   rather than starting a new one.
-
-   The only caveat is that if the outer eax hasn't been
-   restored yet (ie, it's still on stack), we need to insert
-   its value into the SAVE_ALL state before going on, since
-   it's usermode state which we eventually need to restore.
- */
-ENTRY(xen_iret_crit_fixup)
-	/*
-	   Paranoia: Make sure we're really coming from kernel space.
-	   One could imagine a case where userspace jumps into the
-	   critical range address, but just before the CPU delivers a GP,
-	   it decides to deliver an interrupt instead.  Unlikely?
-	   Definitely.  Easy to avoid?  Yes.  The Intel documents
-	   explicitly say that the reported EIP for a bad jump is the
-	   jump instruction itself, not the destination, but some virtual
-	   environments get this wrong.
-	 */
-	movl PT_CS(%esp), %ecx
-	andl $SEGMENT_RPL_MASK, %ecx
-	cmpl $USER_RPL, %ecx
-	je 2f
-
-	lea PT_ORIG_EAX(%esp), %esi
-	lea PT_EFLAGS(%esp), %edi
-
-	/* If eip is before iret_restore_end then stack
-	   hasn't been restored yet. */
-	cmp $iret_restore_end, %eax
-	jae 1f
-
-	movl 0+4(%edi),%eax		/* copy EAX (just above top of frame) */
-	movl %eax, PT_EAX(%esp)
-
-	lea ESP_OFFSET(%edi),%edi	/* move dest up over saved regs */
-
-	/* set up the copy */
-1:	std
-	mov $PT_EIP / 4, %ecx		/* saved regs up to orig_eax */
-	rep movsl
-	cld
-
-	lea 4(%edi),%esp		/* point esp to new frame */
-2:	jmp xen_do_upcall
-
-
-/*
-	Force an event check by making a hypercall,
-	but preserve regs before making the call.
- */
-check_events:
-	push %eax
-	push %ecx
-	push %edx
-	call force_evtchn_callback
-	pop %edx
-	pop %ecx
-	pop %eax
-	ret
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
new file mode 100644
--- /dev/null
+++ b/arch/x86/xen/xen-asm_32.S
@@ -0,0 +1,305 @@
+/*
+	Asm versions of Xen pv-ops, suitable for either direct use or inlining.
+	The inline versions are the same as the direct-use versions, with the
+	pre- and post-amble chopped off.
+
+	This code is encoded for size rather than absolute efficiency,
+	with a view to being able to inline as much as possible.
+
+	We only bother with direct forms (ie, vcpu in pda) of the operations
+	here; the indirect forms are better handled in C, since they're
+	generally too large to inline anyway.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/percpu.h>
+#include <asm/processor-flags.h>
+#include <asm/segment.h>
+
+#include <xen/interface/xen.h>
+
+#define RELOC(x, v)	.globl x##_reloc; x##_reloc=v
+#define ENDPATCH(x)	.globl x##_end; x##_end=.
+
+/* Pseudo-flag used for virtual NMI, which we don't implement yet */
+#define XEN_EFLAGS_NMI	0x80000000
+
+/*
+	Enable events.  This clears the event mask and tests the pending
+	event status with one and operation.  If there are pending
+	events, then enter the hypervisor to get them handled.
+ */
+ENTRY(xen_irq_enable_direct)
+	/* Unmask events */
+	movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+
+	/* Preempt here doesn't matter because that will deal with
+	   any pending interrupts.  The pending check may end up being
+	   run on the wrong CPU, but that doesn't hurt. */
+
+	/* Test for pending */
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
+	jz 1f
+
+2:	call check_events
+1:
+ENDPATCH(xen_irq_enable_direct)
+	ret
+	ENDPROC(xen_irq_enable_direct)
+	RELOC(xen_irq_enable_direct, 2b+1)
+
+
+/*
+	Disabling events is simply a matter of making the event mask
+	non-zero.
+ */
+ENTRY(xen_irq_disable_direct)
+	movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+ENDPATCH(xen_irq_disable_direct)
+	ret
+	ENDPROC(xen_irq_disable_direct)
+	RELOC(xen_irq_disable_direct, 0)
+
+/*
+	(xen_)save_fl is used to get the current interrupt enable status.
+	Callers expect the status to be in X86_EFLAGS_IF, and other bits
+	may be set in the return value.  We take advantage of this by
+	making sure that X86_EFLAGS_IF has the right value (and other bits
+	in that byte are 0), but other bits in the return value are
+	undefined.  We need to toggle the state of the bit, because
+	Xen and x86 use opposite senses (mask vs enable).
+ */
+ENTRY(xen_save_fl_direct)
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+	setz %ah
+	addb %ah,%ah
+ENDPATCH(xen_save_fl_direct)
+	ret
+	ENDPROC(xen_save_fl_direct)
+	RELOC(xen_save_fl_direct, 0)
+
+
+/*
+	In principle the caller should be passing us a value return
+	from xen_save_fl_direct, but for robustness sake we test only
+	the X86_EFLAGS_IF flag rather than the whole byte. After
+	setting the interrupt mask state, it checks for unmasked
+	pending events and enters the hypervisor to get them delivered
+	if so.
+ */
+ENTRY(xen_restore_fl_direct)
+	testb $X86_EFLAGS_IF>>8, %ah
+	setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+	/* Preempt here doesn't matter because that will deal with
+	   any pending interrupts.  The pending check may end up being
+	   run on the wrong CPU, but that doesn't hurt. */
+
+	/* check for unmasked and pending */
+	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
+	jz 1f
+2:	call check_events
+1:
+ENDPATCH(xen_restore_fl_direct)
+	ret
+	ENDPROC(xen_restore_fl_direct)
+	RELOC(xen_restore_fl_direct, 2b+1)
+
+/*
+	We can't use sysexit directly, because we're not running in ring0.
+	But we can easily fake it up using iret.  Assuming xen_sysexit
+	is jumped to with a standard stack frame, we can just strip it
+	back to a standard iret frame and use iret.
+ */
+ENTRY(xen_sysexit)
+	movl PT_EAX(%esp), %eax			/* Shouldn't be necessary? */
+	orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
+	lea PT_EIP(%esp), %esp
+
+	jmp xen_iret
+ENDPROC(xen_sysexit)
+
+/*
+	This is run where a normal iret would be run, with the same stack setup:
+	      8: eflags
+	      4: cs
+	esp-> 0: eip
+
+	This attempts to make sure that any pending events are dealt
+	with on return to usermode, but there is a small window in
+	which an event can happen just before entering usermode.  If
+	the nested interrupt ends up setting one of the TIF_WORK_MASK
+	pending work flags, they will not be tested again before
+	returning to usermode. This means that a process can end up
+	with pending work, which will be unprocessed until the process
+	enters and leaves the kernel again, which could be an
+	unbounded amount of time.  This means that a pending signal or
+	reschedule event could be indefinitely delayed.
+
+	The fix is to notice a nested interrupt in the critical
+	window, and if one occurs, then fold the nested interrupt into
+	the current interrupt stack frame, and re-process it
+	iteratively rather than recursively.  This means that it will
+	exit via the normal path, and all pending work will be dealt
+	with appropriately.
+
+	Because the nested interrupt handler needs to deal with the
+	current stack state in whatever form its in, we keep things
+	simple by only using a single register which is pushed/popped
+	on the stack.
+ */
+ENTRY(xen_iret)
+	/* test eflags for special cases */
+	testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
+	jnz hyper_iret
+
+	push %eax
+	ESP_OFFSET=4	# bytes pushed onto stack
+
+	/* Store vcpu_info pointer for easy access.  Do it this
+	   way to avoid having to reload %fs */
+#ifdef CONFIG_SMP
+	GET_THREAD_INFO(%eax)
+	movl TI_cpu(%eax),%eax
+	movl __per_cpu_offset(,%eax,4),%eax
+	mov per_cpu__xen_vcpu(%eax),%eax
+#else
+	movl per_cpu__xen_vcpu, %eax
+#endif
+
+	/* check IF state we're restoring */
+	testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
+
+	/* Maybe enable events.  Once this happens we could get a
+	   recursive event, so the critical region starts immediately
+	   afterwards.  However, if that happens we don't end up
+	   resuming the code, so we don't have to be worried about
+	   being preempted to another CPU. */
+	setz XEN_vcpu_info_mask(%eax)
+xen_iret_start_crit:
+
+	/* check for unmasked and pending */
+	cmpw $0x0001, XEN_vcpu_info_pending(%eax)
+
+	/* If there's something pending, mask events again so we
+	   can jump back into xen_hypervisor_callback */
+	sete XEN_vcpu_info_mask(%eax)
+
+	popl %eax
+
+	/* From this point on the registers are restored and the stack
+	   updated, so we don't need to worry about it if we're preempted */
+iret_restore_end:
+
+	/* Jump to hypervisor_callback after fixing up the stack.
+	   Events are masked, so jumping out of the critical
+	   region is OK. */
+	je xen_hypervisor_callback
+
+1:	iret
+xen_iret_end_crit:
+.section __ex_table,"a"
+	.align 4
+	.long 1b,iret_exc
+.previous
+
+hyper_iret:
+	/* put this out of line since its very rarely used */
+	jmp hypercall_page + __HYPERVISOR_iret * 32
+
+	.globl xen_iret_start_crit, xen_iret_end_crit
+
+/*
+   This is called by xen_hypervisor_callback in entry.S when it sees
+   that the EIP at the time of interrupt was between xen_iret_start_crit
+   and xen_iret_end_crit.  We're passed the EIP in %eax so we can do
+   a more refined determination of what to do.
+
+   The stack format at this point is:
+	----------------
+	 ss		: (ss/esp may be present if we came from usermode)
+	 esp		:
+	 eflags		}  outer exception info
+	 cs		}
+	 eip		}
+	---------------- <- edi (copy dest)
+	 eax		:  outer eax if it hasn't been restored
+	----------------
+	 eflags		}  nested exception info
+	 cs		}   (no ss/esp because we're nested
+	 eip		}    from the same ring)
+	 orig_eax	}<- esi (copy src)
+	 - - - - - - - -
+	 fs		}
+	 es		}
+	 ds		}  SAVE_ALL state
+	 eax		}
+	  :		:
+	 ebx		}<- esp
+	----------------
+
+   In order to deliver the nested exception properly, we need to shift
+   everything from the return addr up to the error code so it
+   sits just under the outer exception info.  This means that when we
+   handle the exception, we do it in the context of the outer exception
+   rather than starting a new one.
+
+   The only caveat is that if the outer eax hasn't been
+   restored yet (ie, it's still on stack), we need to insert
+   its value into the SAVE_ALL state before going on, since
+   it's usermode state which we eventually need to restore.
+ */
+ENTRY(xen_iret_crit_fixup)
+	/*
+	   Paranoia: Make sure we're really coming from kernel space.
+	   One could imagine a case where userspace jumps into the
+	   critical range address, but just before the CPU delivers a GP,
+	   it decides to deliver an interrupt instead.  Unlikely?
+	   Definitely.  Easy to avoid?  Yes.  The Intel documents
+	   explicitly say that the reported EIP for a bad jump is the
+	   jump instruction itself, not the destination, but some virtual
+	   environments get this wrong.
+	 */
+	movl PT_CS(%esp), %ecx
+	andl $SEGMENT_RPL_MASK, %ecx
+	cmpl $USER_RPL, %ecx
+	je 2f
+
+	lea PT_ORIG_EAX(%esp), %esi
+	lea PT_EFLAGS(%esp), %edi
+
+	/* If eip is before iret_restore_end then stack
+	   hasn't been restored yet. */
+	cmp $iret_restore_end, %eax
+	jae 1f
+
+	movl 0+4(%edi),%eax		/* copy EAX (just above top of frame) */
+	movl %eax, PT_EAX(%esp)
+
+	lea ESP_OFFSET(%edi),%edi	/* move dest up over saved regs */
+
+	/* set up the copy */
+1:	std
+	mov $PT_EIP / 4, %ecx		/* saved regs up to orig_eax */
+	rep movsl
+	cld
+
+	lea 4(%edi),%esp		/* point esp to new frame */
+2:	jmp xen_do_upcall
+
+
+/*
+	Force an event check by making a hypercall,
+	but preserve regs before making the call.
+ */
+check_events:
+	push %eax
+	push %ecx
+	push %edx
+	call force_evtchn_callback
+	pop %edx
+	pop %ecx
+	pop %eax
+	ret
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
new file mode 100644
--- /dev/null
+++ b/arch/x86/xen/xen-asm_64.S
@@ -0,0 +1,141 @@
+/*
+	Asm versions of Xen pv-ops, suitable for either direct use or inlining.
+	The inline versions are the same as the direct-use versions, with the
+	pre- and post-amble chopped off.
+
+	This code is encoded for size rather than absolute efficiency,
+	with a view to being able to inline as much as possible.
+
+	We only bother with direct forms (ie, vcpu in pda) of the operations
+	here; the indirect forms are better handled in C, since they're
+	generally too large to inline anyway.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
+
+#include <xen/interface/xen.h>
+
+#define RELOC(x, v)	.globl x##_reloc; x##_reloc=v
+#define ENDPATCH(x)	.globl x##_end; x##_end=.
+
+/* Pseudo-flag used for virtual NMI, which we don't implement yet */
+#define XEN_EFLAGS_NMI	0x80000000
+
+#if 0
+#include <asm/percpu.h>
+
+/*
+	Enable events.  This clears the event mask and tests the pending
+	event status with one and operation.  If there are pending
+	events, then enter the hypervisor to get them handled.
+ */
+ENTRY(xen_irq_enable_direct)
+	/* Unmask events */
+	movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+
+	/* Preempt here doesn't matter because that will deal with
+	   any pending interrupts.  The pending check may end up being
+	   run on the wrong CPU, but that doesn't hurt. */
+
+	/* Test for pending */
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+	jz 1f
+
+2:	call check_events
+1:
+ENDPATCH(xen_irq_enable_direct)
+	ret
+	ENDPROC(xen_irq_enable_direct)
+	RELOC(xen_irq_enable_direct, 2b+1)
+
+/*
+	Disabling events is simply a matter of making the event mask
+	non-zero.
+ */
+ENTRY(xen_irq_disable_direct)
+	movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+ENDPATCH(xen_irq_disable_direct)
+	ret
+	ENDPROC(xen_irq_disable_direct)
+	RELOC(xen_irq_disable_direct, 0)
+
+/*
+	(xen_)save_fl is used to get the current interrupt enable status.
+	Callers expect the status to be in X86_EFLAGS_IF, and other bits
+	may be set in the return value.  We take advantage of this by
+	making sure that X86_EFLAGS_IF has the right value (and other bits
+	in that byte are 0), but other bits in the return value are
+	undefined.  We need to toggle the state of the bit, because
+	Xen and x86 use opposite senses (mask vs enable).
+ */
+ENTRY(xen_save_fl_direct)
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	setz %ah
+	addb %ah,%ah
+ENDPATCH(xen_save_fl_direct)
+	ret
+	ENDPROC(xen_save_fl_direct)
+	RELOC(xen_save_fl_direct, 0)
+
+/*
+	In principle the caller should be passing us a value return
+	from xen_save_fl_direct, but for robustness sake we test only
+	the X86_EFLAGS_IF flag rather than the whole byte. After
+	setting the interrupt mask state, it checks for unmasked
+	pending events and enters the hypervisor to get them delivered
+	if so.
+ */
+ENTRY(xen_restore_fl_direct)
+	testb $X86_EFLAGS_IF>>8, %ah
+	setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	/* Preempt here doesn't matter because that will deal with
+	   any pending interrupts.  The pending check may end up being
+	   run on the wrong CPU, but that doesn't hurt. */
+
+	/* check for unmasked and pending */
+	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+	jz 1f
+2:	call check_events
+1:
+ENDPATCH(xen_restore_fl_direct)
+	ret
+	ENDPROC(xen_restore_fl_direct)
+	RELOC(xen_restore_fl_direct, 2b+1)
+
+
+/*
+	Force an event check by making a hypercall,
+	but preserve regs before making the call.
+ */
+check_events:
+	push %rax
+	push %rcx
+	push %rdx
+	push %rsi
+	push %rdi
+	push %r8
+	push %r9
+	push %r10
+	push %r11
+	call force_evtchn_callback
+	pop %r11
+	pop %r10
+	pop %r9
+	pop %r8
+	pop %rdi
+	pop %rsi
+	pop %rdx
+	pop %rcx
+	pop %rax
+	ret
+#endif
+
+ENTRY(xen_iret)
+	pushq $0
+	jmp hypercall_page + __HYPERVISOR_iret * 32
+
+ENTRY(xen_sysexit)
+	ud2a



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 25 of 55] xen64: use set_fixmap for shared_info structure
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (23 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 24 of 55] xen64: add 64-bit assembler Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 26 of 55] xen: cpu_detect is 32-bit only Jeremy Fitzhardinge
                   ` (30 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -902,18 +902,11 @@
 void xen_setup_shared_info(void)
 {
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
+		set_fixmap(FIX_PARAVIRT_BOOTMAP,
+			   xen_start_info->shared_info);
 
-		/*
-		 * Create a mapping for the shared info page.
-		 * Should be set_fixmap(), but shared_info is a machine
-		 * address with no corresponding pseudo-phys address.
-		 */
-		set_pte_mfn(addr,
-			    PFN_DOWN(xen_start_info->shared_info),
-			    PAGE_KERNEL);
-
-		HYPERVISOR_shared_info = (struct shared_info *)addr;
+		HYPERVISOR_shared_info =
+			(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
 	} else
 		HYPERVISOR_shared_info =
 			(struct shared_info *)__va(xen_start_info->shared_info);
@@ -1050,8 +1043,13 @@
 #ifdef CONFIG_X86_F00F_BUG
 	case FIX_F00F_IDT:
 #endif
+#ifdef CONFIG_X86_32
 	case FIX_WP_TEST:
 	case FIX_VDSO:
+	case FIX_KMAP_BEGIN ... FIX_KMAP_END:
+#else
+	case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+#endif
 #ifdef CONFIG_X86_LOCAL_APIC
 	case FIX_APIC_BASE:	/* maps dummy local APIC */
 #endif



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 26 of 55] xen: cpu_detect is 32-bit only
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (24 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 25 of 55] xen64: use set_fixmap for shared_info structure Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 27 of 55] xen64: add hypervisor callbacks for events, etc Jeremy Fitzhardinge
                   ` (29 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1365,12 +1365,12 @@
 	/* set the limit of our address space */
 	xen_reserve_top();
 
+#ifdef CONFIG_X86_32
 	/* set up basic CPUID stuff */
 	cpu_detect(&new_cpu_data);
-#ifdef CONFIG_X86_32
 	new_cpu_data.hard_math = 1;
+	new_cpu_data.x86_capability[0] = cpuid_edx(1);
 #endif
-	new_cpu_data.x86_capability[0] = cpuid_edx(1);
 
 	/* Poke various useful things into boot_params */
 	boot_params.hdr.type_of_loader = (9 << 4) | 0;



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 27 of 55] xen64: add hypervisor callbacks for events, etc
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (25 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 26 of 55] xen: cpu_detect is 32-bit only Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 28 of 55] xen64: early mapping setup Jeremy Fitzhardinge
                   ` (28 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/entry_64.S |   98 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1312,3 +1312,101 @@
 	sysret
 	CFI_ENDPROC
 ENDPROC(ignore_sysret)
+
+#ifdef CONFIG_XEN
+ENTRY(xen_hypervisor_callback)
+	zeroentry xen_do_hypervisor_callback
+END(xen_hypervisor_callback)
+
+/*
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until we've done all processing. HOWEVER, we must enable events before
+# popping the stack frame (can't be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so we'd
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+*/
+ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
+	CFI_STARTPROC
+/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
+   see the correct pointer to the pt_regs */
+	movq %rdi, %rsp            # we don't return, adjust the stack frame
+	CFI_ENDPROC
+	CFI_DEFAULT_STACK
+11:	incl %gs:pda_irqcount
+	movq %rsp,%rbp
+	CFI_DEF_CFA_REGISTER rbp
+	cmovzq %gs:pda_irqstackptr,%rsp
+	pushq %rbp			# backlink for old unwinder
+	call xen_evtchn_do_upcall
+	popq %rsp
+	CFI_DEF_CFA_REGISTER rsp
+	decl %gs:pda_irqcount
+	jmp  error_exit
+	CFI_ENDPROC
+END(do_hypervisor_callback)
+
+/*
+# Hypervisor uses this for application faults while it executes.
+# We get here for two reasons:
+#  1. Fault while reloading DS, ES, FS or GS
+#  2. Fault while executing IRET
+# Category 1 we do not need to fix up as Xen has already reloaded all segment
+# registers that could be reloaded and zeroed the others.
+# Category 2 we fix up by killing the current process. We cannot use the
+# normal Linux return path in this case because if we use the IRET hypercall
+# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
+# We distinguish between categories by comparing each saved segment register
+# with its current contents: any discrepancy means we in category 1.
+*/
+ENTRY(xen_failsafe_callback)
+#if 1
+	ud2a
+#else
+	_frame (RIP-0x30)
+	CFI_REL_OFFSET rcx, 0
+	CFI_REL_OFFSET r11, 8
+	movw %ds,%cx
+	cmpw %cx,0x10(%rsp)
+	CFI_REMEMBER_STATE
+	jne 1f
+	movw %es,%cx
+	cmpw %cx,0x18(%rsp)
+	jne 1f
+	movw %fs,%cx
+	cmpw %cx,0x20(%rsp)
+	jne 1f
+	movw %gs,%cx
+	cmpw %cx,0x28(%rsp)
+	jne 1f
+	/* All segments match their saved values => Category 2 (Bad IRET). */
+	movq (%rsp),%rcx
+	CFI_RESTORE rcx
+	movq 8(%rsp),%r11
+	CFI_RESTORE r11
+	addq $0x30,%rsp
+	CFI_ADJUST_CFA_OFFSET -0x30
+	movq $11,%rdi	/* SIGSEGV */
+	jmp do_exit
+	CFI_RESTORE_STATE
+1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
+	movq (%rsp),%rcx
+	CFI_RESTORE rcx
+	movq 8(%rsp),%r11
+	CFI_RESTORE r11
+	addq $0x30,%rsp
+	CFI_ADJUST_CFA_OFFSET -0x30
+	pushq $0
+	CFI_ADJUST_CFA_OFFSET 8
+	SAVE_ALL
+	jmp error_exit
+	CFI_ENDPROC
+#endif
+END(xen_failsafe_callback)
+
+#endif /* CONFIG_XEN */



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 28 of 55] xen64: early mapping setup
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (26 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 27 of 55] xen64: add hypervisor callbacks for events, etc Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 29 of 55] xen64: 64-bit starts using set_pte from very early Jeremy Fitzhardinge
                   ` (27 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Set up the initial pagetables to map the kernel mapping into the
physical mapping space.  This makes __va() usable, since it requires
physical mappings.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c     |  192 ++++++++++++++++++++++++++++++++++++++----
 include/asm-x86/pgtable_64.h |    2
 2 files changed, 178 insertions(+), 16 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,6 +33,7 @@
 #include <xen/interface/sched.h>
 #include <xen/features.h>
 #include <xen/page.h>
+#include <xen/hvc-console.h>
 
 #include <asm/paravirt.h>
 #include <asm/page.h>
@@ -1294,6 +1295,157 @@
 #endif	/* CONFIG_X86_32 */
 }
 
+#ifdef CONFIG_X86_64
+/*
+ * Like __va(), but returns address in the kernel mapping (which is
+ * all we have until the physical memory mapping has been set up.
+ */
+static void *__ka(phys_addr_t paddr)
+{
+	return (void *)(paddr + __START_KERNEL_map);
+}
+
+/* Convert a machine address to physical address */
+static unsigned long m2p(phys_addr_t maddr)
+{
+	phys_addr_t paddr;
+
+	maddr &= PTE_MASK;
+	paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
+
+	return paddr;
+}
+
+/* Convert a machine address to kernel virtual */
+static void *m2v(phys_addr_t maddr)
+{
+	return __ka(m2p(maddr));
+}
+
+static void walk(pgd_t *pgd, unsigned long addr)
+{
+	unsigned l4idx = pgd_index(addr);
+	unsigned l3idx = pud_index(addr);
+	unsigned l2idx = pmd_index(addr);
+	unsigned l1idx = pte_index(addr);
+	pgd_t l4;
+	pud_t l3;
+	pmd_t l2;
+	pte_t l1;
+
+	xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
+		       pgd, addr, l4idx, l3idx, l2idx, l1idx);
+
+	l4 = pgd[l4idx];
+	xen_raw_printk("  l4: %016lx\n", l4.pgd);
+	xen_raw_printk("      %016lx\n", pgd_val(l4));
+
+	l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
+	xen_raw_printk("  l3: %016lx\n", l3.pud);
+	xen_raw_printk("      %016lx\n", pud_val(l3));
+
+	l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
+	xen_raw_printk("  l2: %016lx\n", l2.pmd);
+	xen_raw_printk("      %016lx\n", pmd_val(l2));
+
+	l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
+	xen_raw_printk("  l1: %016lx\n", l1.pte);
+	xen_raw_printk("      %016lx\n", pte_val(l1));
+}
+
+static void set_page_prot(void *addr, pgprot_t prot)
+{
+	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
+	pte_t pte = pfn_pte(pfn, prot);
+
+	xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016x pte=%016x\n",
+		       addr, pfn, get_phys_to_machine(pfn),
+		       pgprot_val(prot), pte.pte);
+
+	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
+		BUG();
+}
+
+static void convert_pfn_mfn(void *v)
+{
+	pte_t *pte = v;
+	int i;
+
+	/* All levels are converted the same way, so just treat them
+	   as ptes. */
+	for(i = 0; i < PTRS_PER_PTE; i++)
+		pte[i] = xen_make_pte(pte[i].pte);
+}
+
+/*
+ * Set up the inital kernel pagetable.
+ *
+ * We can construct this by grafting the Xen provided pagetable into
+ * head_64.S's preconstructed pagetables.  We copy the Xen L2's into
+ * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt.  This
+ * means that only the kernel has a physical mapping to start with -
+ * but that's enough to get __va working.  We need to fill in the rest
+ * of the physical mapping once some sort of allocator has been set
+ * up.
+ */
+static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd)
+{
+	pud_t *l3;
+	pmd_t *l2;
+
+	/* Zap identity mapping */
+	init_level4_pgt[0] = __pgd(0);
+
+	/* Pre-constructed entries are in pfn, so convert to mfn */
+	convert_pfn_mfn(init_level4_pgt);
+	convert_pfn_mfn(level3_ident_pgt);
+	convert_pfn_mfn(level3_kernel_pgt);
+
+	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
+	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
+
+	memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+	memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+
+	l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
+	l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
+	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+
+	/* Make pagetable pieces RO */
+	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
+
+	/* Pin down new L4 */
+	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(init_level4_pgt)));
+
+	/* Unpin Xen-provided one */
+	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+
+	/* Switch over */
+	pgd = init_level4_pgt;
+	xen_write_cr3(__pa(pgd));
+
+	max_pfn_mapped = PFN_DOWN(__pa(pgd) +
+				  xen_start_info->nr_pt_frames*PAGE_SIZE +
+				  512*1024);
+
+	return pgd;
+}
+#else
+static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd)
+{
+	init_pg_tables_start = __pa(pgd);
+	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
+	max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
+
+	return pgd;
+}
+#endif	/* CONFIG_X86_64 */
+
 /* First C function to be called on Xen boot */
 asmlinkage void __init xen_start_kernel(void)
 {
@@ -1336,31 +1488,28 @@
 
 	pgd = (pgd_t *)xen_start_info->pt_base;
 
-#ifdef CONFIG_X86_32
-	init_pg_tables_start = __pa(pgd);
-	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
-	max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT;
-#endif
+	/* Prevent unwanted bits from being set in PTEs. */
+	__supported_pte_mask &= ~_PAGE_GLOBAL;
+	if (!is_initial_xendomain())
+		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
 
-	init_mm.pgd = pgd; /* use the Xen pagetables to start */
+	/* Don't do the full vcpu_info placement stuff until we have a
+	   possible map and a non-dummy shared_info. */
+	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
+
+	xen_raw_console_write("mapping kernel into physical memory\n");
+	pgd = xen_setup_kernel_pagetable(pgd);
+
+	init_mm.pgd = pgd;
 
 	/* keep using Xen gdt for now; no urgent need to change it */
 
 	x86_write_percpu(xen_cr3, __pa(pgd));
 	x86_write_percpu(xen_current_cr3, __pa(pgd));
 
-	/* Don't do the full vcpu_info placement stuff until we have a
-	   possible map and a non-dummy shared_info. */
-	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
-
 	pv_info.kernel_rpl = 1;
 	if (xen_feature(XENFEAT_supervisor_mode_kernel))
 		pv_info.kernel_rpl = 0;
-
-	/* Prevent unwanted bits from being set in PTEs. */
-	__supported_pte_mask &= ~_PAGE_GLOBAL;
-	if (!is_initial_xendomain())
-		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
 
 	/* set the limit of our address space */
 	xen_reserve_top();
@@ -1384,10 +1533,21 @@
 		add_preferred_console("hvc", 0, NULL);
 	}
 
+	xen_raw_console_write("about to get started...\n");
+
+#if 0
+	xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
+		       &boot_params, __pa_symbol(&boot_params),
+		       __va(__pa_symbol(&boot_params)));
+
+	walk(pgd, &boot_params);
+	walk(pgd, __va(__pa(&boot_params)));
+#endif
+
 	/* Start the world */
 #ifdef CONFIG_X86_32
 	i386_start_kernel();
 #else
-	x86_64_start_kernel((char *)&boot_params);
+	x86_64_start_reservations((char *)__pa_symbol(&boot_params));
 #endif
 }
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -16,6 +16,8 @@
 extern pud_t level3_kernel_pgt[512];
 extern pud_t level3_ident_pgt[512];
 extern pmd_t level2_kernel_pgt[512];
+extern pmd_t level2_fixmap_pgt[512];
+extern pmd_t level2_ident_pgt[512];
 extern pgd_t init_level4_pgt[];
 
 #define swapper_pg_dir init_level4_pgt



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 29 of 55] xen64: 64-bit starts using set_pte from very early
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (27 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 28 of 55] xen64: early mapping setup Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 30 of 55] xen64: map an initial chunk of physical memory Jeremy Fitzhardinge
                   ` (26 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

It also doesn't need the 32-bit hack version of set_pte for initial
pagetable construction, so just make it use the real thing.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |    4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1194,7 +1194,11 @@
 	.kmap_atomic_pte = xen_kmap_atomic_pte,
 #endif
 
+#ifdef CONFIG_X86_64
+	.set_pte = xen_set_pte,
+#else
 	.set_pte = xen_set_pte_init,
+#endif
 	.set_pte_at = xen_set_pte_at,
 	.set_pmd = xen_set_pmd_hyper,
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 30 of 55] xen64: map an initial chunk of physical memory
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (28 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 29 of 55] xen64: 64-bit starts using set_pte from very early Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 31 of 55] xen32: create initial mappings like 64-bit Jeremy Fitzhardinge
                   ` (25 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Early in boot, map a chunk of extra physical memory for use later on.
We need a pool of mapped pages to allocate further pages to construct
pagetables mapping all physical memory.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   79 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 69 insertions(+), 10 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1382,6 +1382,61 @@
 }
 
 /*
+ * Identity map, in addition to plain kernel map.  This needs to be
+ * large enough to allocate page table pages to allocate the rest.
+ * Each page can map 2MB.
+ */
+static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
+
+static __init void xen_map_identity_early(unsigned long max_pfn)
+{
+	unsigned pmdidx, pteidx;
+	unsigned ident_pte;
+	unsigned long pfn;
+
+	ident_pte = 0;
+	pfn = 0;
+	for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
+		pte_t *pte_page;
+
+		BUG_ON(level2_ident_pgt[pmdidx].pmd != level2_kernel_pgt[pmdidx].pmd);
+
+		/* Reuse or allocate a page of ptes */
+		if (pmd_present(level2_ident_pgt[pmdidx]))
+			pte_page = m2v(level2_ident_pgt[pmdidx].pmd);
+		else {
+			/* Check for free pte pages */
+			if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
+				break;
+
+			pte_page = &level1_ident_pgt[ident_pte];
+			ident_pte += PTRS_PER_PTE;
+
+			/* Install new l1 in l2(s) */
+			level2_ident_pgt[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
+			level2_kernel_pgt[pmdidx] = level2_ident_pgt[pmdidx];
+		}
+
+		/* Install mappings */
+		for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
+			pte_t pte;
+
+			if (pfn > max_pfn_mapped)
+				max_pfn_mapped = pfn;
+
+			if (!pte_none(pte_page[pteidx]))
+				continue;
+
+			pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
+			pte_page[pteidx] = pte;
+		}
+	}
+
+	for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
+		set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
+}
+
+/*
  * Set up the inital kernel pagetable.
  *
  * We can construct this by grafting the Xen provided pagetable into
@@ -1392,7 +1447,7 @@
  * of the physical mapping once some sort of allocator has been set
  * up.
  */
-static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd)
+static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
 	pud_t *l3;
 	pmd_t *l2;
@@ -1415,6 +1470,9 @@
 	l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
 	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
 
+	/* Set up identity map */
+	xen_map_identity_early(max_pfn);
+
 	/* Make pagetable pieces RO */
 	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
@@ -1424,7 +1482,7 @@
 	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
 
 	/* Pin down new L4 */
-	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(init_level4_pgt)));
+	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa_symbol(init_level4_pgt)));
 
 	/* Unpin Xen-provided one */
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
@@ -1433,18 +1491,22 @@
 	pgd = init_level4_pgt;
 	xen_write_cr3(__pa(pgd));
 
-	max_pfn_mapped = PFN_DOWN(__pa(pgd) +
-				  xen_start_info->nr_pt_frames*PAGE_SIZE +
-				  512*1024);
+	reserve_early(__pa(xen_start_info->pt_base),
+		      __pa(xen_start_info->pt_base +
+			   xen_start_info->nr_pt_frames * PAGE_SIZE),
+		      "XEN PAGETABLES");
 
 	return pgd;
 }
 #else
-static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd)
+static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
 	init_pg_tables_start = __pa(pgd);
 	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
 	max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
+
+	x86_write_percpu(xen_cr3, __pa(pgd));
+	x86_write_percpu(xen_current_cr3, __pa(pgd));
 
 	return pgd;
 }
@@ -1502,14 +1564,11 @@
 	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
 
 	xen_raw_console_write("mapping kernel into physical memory\n");
-	pgd = xen_setup_kernel_pagetable(pgd);
+	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
 
 	init_mm.pgd = pgd;
 
 	/* keep using Xen gdt for now; no urgent need to change it */
-
-	x86_write_percpu(xen_cr3, __pa(pgd));
-	x86_write_percpu(xen_current_cr3, __pa(pgd));
 
 	pv_info.kernel_rpl = 1;
 	if (xen_feature(XENFEAT_supervisor_mode_kernel))



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 31 of 55] xen32: create initial mappings like 64-bit
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (29 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 30 of 55] xen64: map an initial chunk of physical memory Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 32 of 55] xen: fix truncation of machine address Jeremy Fitzhardinge
                   ` (24 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Rearrange the pagetable initialization to share code with the 64-bit
kernel.  Rather than deferring anything to pagetable_setup_start, just
set up an initial pagetable in swapper_pg_dir early at startup, and
create an additional 8MB of physical memory mappings.  This matches
the native head_32.S mappings to a large degree, and allows the rest
of the pagetable setup to continue without much Xen vs. native
difference.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |  130 ++++++++++++++++++----------------------------
 1 file changed, 52 insertions(+), 78 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -854,50 +854,6 @@
 
 static __init void xen_pagetable_setup_start(pgd_t *base)
 {
-#ifdef CONFIG_X86_32
-	pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
-	int i;
-
-	init_mm.pgd = base;
-	/*
-	 * copy top-level of Xen-supplied pagetable into place.  This
-	 * is a stand-in while we copy the pmd pages.
-	 */
-	memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
-
-	/*
-	 * For PAE, need to allocate new pmds, rather than
-	 * share Xen's, since Xen doesn't like pmd's being
-	 * shared between address spaces.
-	 */
-	for (i = 0; i < PTRS_PER_PGD; i++) {
-		if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
-			pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
-
-			memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
-			       PAGE_SIZE);
-
-			make_lowmem_page_readonly(pmd);
-
-			set_pgd(&base[i], __pgd(1 + __pa(pmd)));
-		} else
-			pgd_clear(&base[i]);
-	}
-
-	/* make sure zero_page is mapped RO so we can use it in pagetables */
-	make_lowmem_page_readonly(empty_zero_page);
-	make_lowmem_page_readonly(base);
-	/*
-	 * Switch to new pagetable.  This is done before
-	 * pagetable_init has done anything so that the new pages
-	 * added to the table can be prepared properly for Xen.
-	 */
-	xen_write_cr3(__pa(base));
-
-	/* Unpin initial Xen pagetable */
-	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
-			  PFN_DOWN(__pa(xen_start_info->pt_base)));
-#endif	/* CONFIG_X86_32 */
 }
 
 void xen_setup_shared_info(void)
@@ -936,12 +892,6 @@
 	pv_mmu_ops.set_pte = xen_set_pte;
 
 	xen_setup_shared_info();
-
-#ifdef CONFIG_X86_32
-	/* Actually pin the pagetable down, but we can't set PG_pinned
-	   yet because the page structures don't exist yet. */
-	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
-#endif
 }
 
 static __init void xen_post_allocator_init(void)
@@ -1299,14 +1249,17 @@
 #endif	/* CONFIG_X86_32 */
 }
 
-#ifdef CONFIG_X86_64
 /*
  * Like __va(), but returns address in the kernel mapping (which is
  * all we have until the physical memory mapping has been set up.
  */
 static void *__ka(phys_addr_t paddr)
 {
+#ifdef CONFIG_X86_64
 	return (void *)(paddr + __START_KERNEL_map);
+#else
+	return __va(paddr);
+#endif
 }
 
 /* Convert a machine address to physical address */
@@ -1326,6 +1279,7 @@
 	return __ka(m2p(maddr));
 }
 
+#ifdef CONFIG_X86_64
 static void walk(pgd_t *pgd, unsigned long addr)
 {
 	unsigned l4idx = pgd_index(addr);
@@ -1356,29 +1310,19 @@
 	xen_raw_printk("  l1: %016lx\n", l1.pte);
 	xen_raw_printk("      %016lx\n", pte_val(l1));
 }
+#endif
 
 static void set_page_prot(void *addr, pgprot_t prot)
 {
 	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
 	pte_t pte = pfn_pte(pfn, prot);
 
-	xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016x pte=%016x\n",
+	xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
 		       addr, pfn, get_phys_to_machine(pfn),
 		       pgprot_val(prot), pte.pte);
 
 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
 		BUG();
-}
-
-static void convert_pfn_mfn(void *v)
-{
-	pte_t *pte = v;
-	int i;
-
-	/* All levels are converted the same way, so just treat them
-	   as ptes. */
-	for(i = 0; i < PTRS_PER_PTE; i++)
-		pte[i] = xen_make_pte(pte[i].pte);
 }
 
 /*
@@ -1388,7 +1332,7 @@
  */
 static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
 
-static __init void xen_map_identity_early(unsigned long max_pfn)
+static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 {
 	unsigned pmdidx, pteidx;
 	unsigned ident_pte;
@@ -1399,11 +1343,9 @@
 	for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
 		pte_t *pte_page;
 
-		BUG_ON(level2_ident_pgt[pmdidx].pmd != level2_kernel_pgt[pmdidx].pmd);
-
 		/* Reuse or allocate a page of ptes */
-		if (pmd_present(level2_ident_pgt[pmdidx]))
-			pte_page = m2v(level2_ident_pgt[pmdidx].pmd);
+		if (pmd_present(pmd[pmdidx]))
+			pte_page = m2v(pmd[pmdidx].pmd);
 		else {
 			/* Check for free pte pages */
 			if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
@@ -1412,9 +1354,7 @@
 			pte_page = &level1_ident_pgt[ident_pte];
 			ident_pte += PTRS_PER_PTE;
 
-			/* Install new l1 in l2(s) */
-			level2_ident_pgt[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
-			level2_kernel_pgt[pmdidx] = level2_ident_pgt[pmdidx];
+			pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
 		}
 
 		/* Install mappings */
@@ -1434,6 +1374,20 @@
 
 	for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
 		set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
+
+	set_page_prot(pmd, PAGE_KERNEL_RO);
+}
+
+#ifdef CONFIG_X86_64
+static void convert_pfn_mfn(void *v)
+{
+	pte_t *pte = v;
+	int i;
+
+	/* All levels are converted the same way, so just treat them
+	   as ptes. */
+	for(i = 0; i < PTRS_PER_PTE; i++)
+		pte[i] = xen_make_pte(pte[i].pte);
 }
 
 /*
@@ -1471,18 +1425,18 @@
 	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
 
 	/* Set up identity map */
-	xen_map_identity_early(max_pfn);
+	xen_map_identity_early(level2_ident_pgt, max_pfn);
 
 	/* Make pagetable pieces RO */
 	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
-	set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
 
 	/* Pin down new L4 */
-	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa_symbol(init_level4_pgt)));
+	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+			  PFN_DOWN(__pa_symbol(init_level4_pgt)));
 
 	/* Unpin Xen-provided one */
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
@@ -1498,17 +1452,37 @@
 
 	return pgd;
 }
-#else
+#else	/* !CONFIG_X86_64 */
+static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
+
 static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
+	pmd_t *kernel_pmd;
+
 	init_pg_tables_start = __pa(pgd);
 	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
 	max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
 
-	x86_write_percpu(xen_cr3, __pa(pgd));
-	x86_write_percpu(xen_current_cr3, __pa(pgd));
+	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
+	memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
 
-	return pgd;
+	xen_map_identity_early(level2_kernel_pgt, max_pfn);
+
+	memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+	set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
+			__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
+
+	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+	set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+	set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
+
+	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+
+	xen_write_cr3(__pa(swapper_pg_dir));
+
+	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+
+	return swapper_pg_dir;
 }
 #endif	/* CONFIG_X86_64 */
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 32 of 55] xen: fix truncation of machine address
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (30 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 31 of 55] xen32: create initial mappings like 64-bit Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 33 of 55] xen64: use arbitrary_virt_to_machine for xen_set_pmd Jeremy Fitzhardinge
                   ` (23 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

arbitrary_virt_to_machine can truncate a machine address if its above
4G.  Cast the problem away.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -186,7 +186,7 @@
 
 	BUG_ON(pte == NULL);
 
-	return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset);
+	return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
 }
 
 void make_lowmem_page_readonly(void *vaddr)



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 33 of 55] xen64: use arbitrary_virt_to_machine for xen_set_pmd
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (31 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 32 of 55] xen: fix truncation of machine address Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 34 of 55] xen: set num_processors Jeremy Fitzhardinge
                   ` (22 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

When building initial pagetables in 64-bit kernel the pud/pmd pointer may
be in ioremap/fixmap space, so we need to walk the pagetable to look up the
physical address.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c         |    9 ++++++---
 include/asm-x86/xen/page.h |    2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -178,8 +178,9 @@
 	p2m_top[topidx][idx] = mfn;
 }
 
-xmaddr_t arbitrary_virt_to_machine(unsigned long address)
+xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 {
+	unsigned long address = (unsigned long)vaddr;
 	unsigned int level;
 	pte_t *pte = lookup_address(address, &level);
 	unsigned offset = address & ~PAGE_MASK;
@@ -253,7 +254,8 @@
 
 	xen_mc_batch();
 
-	u.ptr = virt_to_machine(ptr).maddr;
+	/* ptr may be ioremapped for 64-bit pagetable setup */
+	u.ptr = arbitrary_virt_to_machine(ptr).maddr;
 	u.val = pmd_val_ma(val);
 	extend_mmu_update(&u);
 
@@ -415,7 +417,8 @@
 
 	xen_mc_batch();
 
-	u.ptr = virt_to_machine(ptr).maddr;
+	/* ptr may be ioremapped for 64-bit pagetable setup */
+	u.ptr = arbitrary_virt_to_machine(ptr).maddr;
 	u.val = pud_val_ma(val);
 	extend_mmu_update(&u);
 
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -158,7 +158,7 @@
 #define pgd_val_ma(x)	((x).pgd)
 
 
-xmaddr_t arbitrary_virt_to_machine(unsigned long address);
+xmaddr_t arbitrary_virt_to_machine(void *address);
 void make_lowmem_page_readonly(void *vaddr);
 void make_lowmem_page_readwrite(void *vaddr);
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 34 of 55] xen: set num_processors
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (32 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 33 of 55] xen64: use arbitrary_virt_to_machine for xen_set_pmd Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 35 of 55] xen64: defer setting pagetable alloc/release ops Jeremy Fitzhardinge
                   ` (21 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Someone's got to do it.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/smp.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -155,8 +155,10 @@
 
 	for (i = 0; i < NR_CPUS; i++) {
 		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
-		if (rc >= 0)
+		if (rc >= 0) {
+			num_processors++;
 			cpu_set(i, cpu_possible_map);
+		}
 	}
 }
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 35 of 55] xen64: defer setting pagetable alloc/release ops
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (33 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 34 of 55] xen: set num_processors Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 36 of 55] xen: use set_pte_vaddr Jeremy Fitzhardinge
                   ` (20 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

We need to wait until the page structure is available to use the
proper pagetable page alloc/release operations, since they use struct
page to determine if a pagetable is pinned.

This happened to work in 32bit because nobody allocated new pagetable
pages in the interim between xen_pagetable_setup_done and
xen_post_allocator_init, but the 64-bit kenrel needs to allocate more
pagetable levels.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -878,6 +878,18 @@
 
 static __init void xen_pagetable_setup_done(pgd_t *base)
 {
+	xen_setup_shared_info();
+}
+
+static __init void xen_post_allocator_init(void)
+{
+	pv_mmu_ops.set_pte = xen_set_pte;
+	pv_mmu_ops.set_pmd = xen_set_pmd;
+	pv_mmu_ops.set_pud = xen_set_pud;
+#if PAGETABLE_LEVELS == 4
+	pv_mmu_ops.set_pgd = xen_set_pgd;
+#endif
+
 	/* This will work as long as patching hasn't happened yet
 	   (which it hasn't) */
 	pv_mmu_ops.alloc_pte = xen_alloc_pte;
@@ -887,19 +899,6 @@
 #if PAGETABLE_LEVELS == 4
 	pv_mmu_ops.alloc_pud = xen_alloc_pud;
 	pv_mmu_ops.release_pud = xen_release_pud;
-#endif
-
-	pv_mmu_ops.set_pte = xen_set_pte;
-
-	xen_setup_shared_info();
-}
-
-static __init void xen_post_allocator_init(void)
-{
-	pv_mmu_ops.set_pmd = xen_set_pmd;
-	pv_mmu_ops.set_pud = xen_set_pud;
-#if PAGETABLE_LEVELS == 4
-	pv_mmu_ops.set_pgd = xen_set_pgd;
 #endif
 
 	xen_mark_init_mm_pinned();



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 36 of 55] xen: use set_pte_vaddr
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (34 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 35 of 55] xen64: defer setting pagetable alloc/release ops Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:06 ` [PATCH 37 of 55] xen64: xen_write_idt_entry() and cvt_gate_to_trap() Jeremy Fitzhardinge
                   ` (19 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Make Xen's set_pte_mfn() use set_pte_vaddr rather than copying it.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 arch/x86/xen/mmu.c |   30 +-----------------------------
 1 file changed, 1 insertion(+), 29 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -282,35 +282,7 @@
  */
 void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	pgd = swapper_pg_dir + pgd_index(vaddr);
-	if (pgd_none(*pgd)) {
-		BUG();
-		return;
-	}
-	pud = pud_offset(pgd, vaddr);
-	if (pud_none(*pud)) {
-		BUG();
-		return;
-	}
-	pmd = pmd_offset(pud, vaddr);
-	if (pmd_none(*pmd)) {
-		BUG();
-		return;
-	}
-	pte = pte_offset_kernel(pmd, vaddr);
-	/* <mfn,flags> stored as-is, to permit clearing entries */
-	xen_set_pte(pte, mfn_pte(mfn, flags));
-
-	/*
-	 * It's enough to flush this one mapping.
-	 * (PGE mappings get flushed as well)
-	 */
-	__flush_tlb_one(vaddr);
+	set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
 }
 
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 37 of 55] xen64: xen_write_idt_entry() and cvt_gate_to_trap()
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (35 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 36 of 55] xen: use set_pte_vaddr Jeremy Fitzhardinge
@ 2008-07-08 22:06 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 38 of 55] xen64: deal with extra words Xen pushes onto exception frames Jeremy Fitzhardinge
                   ` (18 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

From: Eduardo Habkost <ehabkost@Rawhide-64.localdomain>

Changed to use the (to-be-)unified descriptor structs.

Signed-off-by: Eduardo Habkost <ehabkost@Rawhide-64.localdomain>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -401,23 +401,18 @@
 	preempt_enable();
 }
 
-static int cvt_gate_to_trap(int vector, u32 low, u32 high,
+static int cvt_gate_to_trap(int vector, const gate_desc *val,
 			    struct trap_info *info)
 {
-	u8 type, dpl;
-
-	type = (high >> 8) & 0x1f;
-	dpl = (high >> 13) & 3;
-
-	if (type != 0xf && type != 0xe)
+	if (val->type != 0xf && val->type != 0xe)
 		return 0;
 
 	info->vector = vector;
-	info->address = (high & 0xffff0000) | (low & 0x0000ffff);
-	info->cs = low >> 16;
-	info->flags = dpl;
+	info->address = gate_offset(*val);
+	info->cs = gate_segment(*val);
+	info->flags = val->dpl;
 	/* interrupt gates clear IF */
-	if (type == 0xe)
+	if (val->type == 0xe)
 		info->flags |= 4;
 
 	return 1;
@@ -444,11 +439,10 @@
 
 	if (p >= start && (p + 8) <= end) {
 		struct trap_info info[2];
-		u32 *desc = (u32 *)g;
 
 		info[1].address = 0;
 
-		if (cvt_gate_to_trap(entrynum, desc[0], desc[1], &info[0]))
+		if (cvt_gate_to_trap(entrynum, g, &info[0]))
 			if (HYPERVISOR_set_trap_table(info))
 				BUG();
 	}
@@ -461,13 +455,13 @@
 {
 	unsigned in, out, count;
 
-	count = (desc->size+1) / 8;
+	count = (desc->size+1) / sizeof(gate_desc);
 	BUG_ON(count > 256);
 
 	for (in = out = 0; in < count; in++) {
-		const u32 *entry = (u32 *)(desc->address + in * 8);
+		gate_desc *entry = (gate_desc*)(desc->address) + in;
 
-		if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out]))
+		if (cvt_gate_to_trap(in, entry, &traps[out]))
 			out++;
 	}
 	traps[out].address = 0;



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 38 of 55] xen64: deal with extra words Xen pushes onto exception frames
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (36 preceding siblings ...)
  2008-07-08 22:06 ` [PATCH 37 of 55] xen64: xen_write_idt_entry() and cvt_gate_to_trap() Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 39 of 55] xen64: add pvop for swapgs Jeremy Fitzhardinge
                   ` (17 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Xen pushes two extra words containing the values of rcx and r11.  This
pvop hook copies the words back into their appropriate registers, and
cleans them off the stack.  This leaves the stack in native form, so
the normal handler can run unchanged.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c  |    2 +-
 arch/x86/xen/xen-asm_64.S |    5 +++++
 arch/x86/xen/xen-ops.h    |    2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1091,7 +1091,7 @@
 	.safe_halt = xen_safe_halt,
 	.halt = xen_halt,
 #ifdef CONFIG_X86_64
-	.adjust_exception_frame = paravirt_nop,
+	.adjust_exception_frame = xen_adjust_exception_frame,
 #endif
 };
 
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -133,6 +133,11 @@
 	ret
 #endif
 
+ENTRY(xen_adjust_exception_frame)
+	mov 8+0(%rsp),%rcx
+	mov 8+8(%rsp),%r11
+	ret $16
+
 ENTRY(xen_iret)
 	pushq $0
 	jmp hypercall_page + __HYPERVISOR_iret * 32
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -67,7 +67,9 @@
 DECL_ASM(unsigned long, xen_save_fl_direct, void);
 DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 
+/* These are not functions, and cannot be called normally */
 void xen_iret(void);
 void xen_sysexit(void);
+void xen_adjust_exception_frame(void);
 
 #endif /* XEN_OPS_H */



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 39 of 55] xen64: add pvop for swapgs
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (37 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 38 of 55] xen64: deal with extra words Xen pushes onto exception frames Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 40 of 55] xen64: register callbacks in arch-independent way Jeremy Fitzhardinge
                   ` (16 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

swapgs is a no-op under Xen, because the hypervisor makes sure the
right version of %gs is current when switching between user and kernel
modes.  This means that the swapgs "implementation" can be inlined and
used when the stack is unsafe (usermode).  Unfortunately, it means
that disabling patching will result in a non-booting kernel...

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |    3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1076,6 +1076,9 @@
 	.set_iopl_mask = xen_set_iopl_mask,
 	.io_delay = xen_io_delay,
 
+	/* Xen takes care of %gs when switching to usermode for us */
+	.swapgs = paravirt_nop,
+
 	.lazy_mode = {
 		.enter = paravirt_enter_lazy_cpu,
 		.leave = xen_leave_lazy,



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 40 of 55] xen64: register callbacks in arch-independent way
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (38 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 39 of 55] xen64: add pvop for swapgs Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 41 of 55] xen64: add identity irq->vector map Jeremy Fitzhardinge
                   ` (15 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Use callback_op hypercall to register callbacks in a 32/64-bit
independent way (64-bit doesn't need a code segment, but that detail
is hidden in XEN_CALLBACK).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/setup.c            |   27 +++++++++++++++++----------
 include/asm-x86/xen/hypercall.h |   12 ++++++++++++
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -91,19 +91,25 @@
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 }
 
-void xen_enable_sysenter(void)
+static __cpuinit int register_callback(unsigned type, const void *func)
+{
+	struct callback_register callback = {
+		.type = type,
+		.address = XEN_CALLBACK(__KERNEL_CS, func),
+		.flags = CALLBACKF_mask_events,
+	};
+
+	return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
+}
+
+void __cpuinit xen_enable_sysenter(void)
 {
 	int cpu = smp_processor_id();
 	extern void xen_sysenter_target(void);
-	/* Mask events on entry, even though they get enabled immediately */
-	static struct callback_register sysenter = {
-		.type = CALLBACKTYPE_sysenter,
-		.address = XEN_CALLBACK(__KERNEL_CS, xen_sysenter_target),
-		.flags = CALLBACKF_mask_events,
-	};
 
 	if (!boot_cpu_has(X86_FEATURE_SEP) ||
-	    HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
+	    register_callback(CALLBACKTYPE_sysenter,
+			      xen_sysenter_target) != 0) {
 		clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
 	}
@@ -120,8 +126,9 @@
 	if (!xen_feature(XENFEAT_auto_translated_physmap))
 		HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3);
 
-	HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
-				 __KERNEL_CS, (unsigned long)xen_failsafe_callback);
+	if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
+	    register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
+		BUG();
 
 	xen_enable_sysenter();
 
diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -226,6 +226,7 @@
 	return _hypercall2(int, stack_switch, ss, esp);
 }
 
+#ifdef CONFIG_X86_32
 static inline int
 HYPERVISOR_set_callbacks(unsigned long event_selector,
 			 unsigned long event_address,
@@ -236,6 +237,17 @@
 			   event_selector, event_address,
 			   failsafe_selector, failsafe_address);
 }
+#else  /* CONFIG_X86_64 */
+static inline int
+HYPERVISOR_set_callbacks(unsigned long event_address,
+			unsigned long failsafe_address,
+			unsigned long syscall_address)
+{
+	return _hypercall3(int, set_callbacks,
+			   event_address, failsafe_address,
+			   syscall_address);
+}
+#endif  /* CONFIG_X86_{32,64} */
 
 static inline int
 HYPERVISOR_callback_op(int cmd, void *arg)



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 41 of 55] xen64: add identity irq->vector map
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (39 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 40 of 55] xen64: register callbacks in arch-independent way Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 42 of 55] Xen64: HYPERVISOR_set_segment_base() implementation Jeremy Fitzhardinge
                   ` (14 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

The x86_64 interrupt subsystem is oriented towards vectors, as opposed
to a flat irq space as it is in x86-32.  This patch adds a simple
identity irq->vector mapping so that we can continue to feed irqs into
do_IRQ() and get a good result.

Ideally x86_32 will unify with the 64-bit code and use vectors too.
At that point we can move to mapping event channels to vectors, which
will allow us to economise on irqs (so per-cpu event channels can
share irqs, rather than having to allocte one per cpu, for example).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1085,8 +1085,25 @@
 	},
 };
 
+static void __init __xen_init_IRQ(void)
+{
+#ifdef CONFIG_X86_64
+	int i;
+
+	/* Create identity vector->irq map */
+	for(i = 0; i < NR_VECTORS; i++) {
+		int cpu;
+
+		for_each_possible_cpu(cpu)
+			per_cpu(vector_irq, cpu)[i] = i;
+	}
+#endif	/* CONFIG_X86_64 */
+
+	xen_init_IRQ();
+}
+
 static const struct pv_irq_ops xen_irq_ops __initdata = {
-	.init_IRQ = xen_init_IRQ,
+	.init_IRQ = __xen_init_IRQ,
 	.save_fl = xen_save_fl,
 	.restore_fl = xen_restore_fl,
 	.irq_disable = xen_irq_disable,



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 42 of 55] Xen64: HYPERVISOR_set_segment_base() implementation
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (40 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 41 of 55] xen64: add identity irq->vector map Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 43 of 55] xen64: implement xen_load_gs_index() Jeremy Fitzhardinge
                   ` (13 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

From: Eduardo Habkost <ehabkost@redhat.com>

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 include/asm-x86/xen/hypercall.h |    8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -388,6 +388,14 @@
 	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
 }
 
+#ifdef CONFIG_X86_64
+static inline int
+HYPERVISOR_set_segment_base(int reg, unsigned long value)
+{
+	return _hypercall2(int, set_segment_base, reg, value);
+}
+#endif
+
 static inline int
 HYPERVISOR_suspend(unsigned long srec)
 {



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 43 of 55] xen64: implement xen_load_gs_index()
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (41 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 42 of 55] Xen64: HYPERVISOR_set_segment_base() implementation Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 44 of 55] xen: rework pgd_walk to deal with 32/64 bit Jeremy Fitzhardinge
                   ` (12 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

From: Eduardo Habkost <ehabkost@redhat.com>

xen-64: implement xen_load_gs_index()

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -384,6 +384,14 @@
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
 		loadsegment(gs, 0);
 }
+
+#ifdef CONFIG_X86_64
+static void xen_load_gs_index(unsigned int idx)
+{
+	if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
+		BUG();
+}
+#endif
 
 static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 				const void *ptr)
@@ -1063,6 +1071,9 @@
 	.load_gdt = xen_load_gdt,
 	.load_idt = xen_load_idt,
 	.load_tls = xen_load_tls,
+#ifdef CONFIG_X86_64
+	.load_gs_index = xen_load_gs_index,
+#endif
 
 	.store_gdt = native_store_gdt,
 	.store_idt = native_store_idt,



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 44 of 55] xen: rework pgd_walk to deal with 32/64 bit
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (42 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 43 of 55] xen64: implement xen_load_gs_index() Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 45 of 55] xen: make sure the kernel command line is right Jeremy Fitzhardinge
                   ` (11 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Rewrite pgd_walk to deal with 64-bit address spaces.  There are two
notible features of 64-bit workspaces:

 1. The physical address is only 48 bits wide, with the upper 16 bits
    being sign extension; kernel addresses are negative, and userspace is
    positive.

 2. The Xen hypervisor mapping is at the negative-most address, just above
    the sign-extension hole.

1. means that we can't easily use addresses when traversing the space,
since we must deal with sign extension.  This rewrite expresses
everything in terms of pgd/pud/pmd indices, which means we don't need
to worry about the exact configuration of the virtual memory space.
This approach works equally well in 32-bit.

To deal with 2, assume the hole is between the uppermost userspace
address and PAGE_OFFSET.  For 64-bit this skips the Xen mapping hole.
For 32-bit, the hole is zero-sized.

In all cases, the uppermost kernel address is FIXADDR_TOP.

A side-effect of this patch is that the upper boundary is actually
handled properly, exposing a long-standing bug in 32-bit, which failed
to pin kernel pmd page.  The kernel pmd is not shared, and so must be
explicitly pinned, even though the kernel ptes are shared and don't
need pinning.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |  117 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 76 insertions(+), 41 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -44,6 +44,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
+#include <asm/fixmap.h>
 #include <asm/mmu_context.h>
 #include <asm/paravirt.h>
 #include <asm/linkage.h>
@@ -491,77 +492,103 @@
 #endif	/* PAGETABLE_LEVELS == 4 */
 
 /*
-  (Yet another) pagetable walker.  This one is intended for pinning a
-  pagetable.  This means that it walks a pagetable and calls the
-  callback function on each page it finds making up the page table,
-  at every level.  It walks the entire pagetable, but it only bothers
-  pinning pte pages which are below pte_limit.  In the normal case
-  this will be TASK_SIZE, but at boot we need to pin up to
-  FIXADDR_TOP.  But the important bit is that we don't pin beyond
-  there, because then we start getting into Xen's ptes.
-*/
-static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level),
+ * (Yet another) pagetable walker.  This one is intended for pinning a
+ * pagetable.  This means that it walks a pagetable and calls the
+ * callback function on each page it finds making up the page table,
+ * at every level.  It walks the entire pagetable, but it only bothers
+ * pinning pte pages which are below limit.  In the normal case this
+ * will be STACK_TOP_MAX, but at boot we need to pin up to
+ * FIXADDR_TOP.
+ *
+ * For 32-bit the important bit is that we don't pin beyond there,
+ * because then we start getting into Xen's ptes.
+ *
+ * For 64-bit, we must skip the Xen hole in the middle of the address
+ * space, just after the big x86-64 virtual hole.
+ */
+static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
 		    unsigned long limit)
 {
-	pgd_t *pgd = pgd_base;
 	int flush = 0;
-	unsigned long addr = 0;
-	unsigned long pgd_next;
+	unsigned hole_low, hole_high;
+	unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
+	unsigned pgdidx, pudidx, pmdidx;
 
-	BUG_ON(limit > FIXADDR_TOP);
+	/* The limit is the last byte to be touched */
+	limit--;
+	BUG_ON(limit >= FIXADDR_TOP);
 
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return 0;
 
-	for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) {
+	/*
+	 * 64-bit has a great big hole in the middle of the address
+	 * space, which contains the Xen mappings.  On 32-bit these
+	 * will end up making a zero-sized hole and so is a no-op.
+	 */
+	hole_low = pgd_index(STACK_TOP_MAX + PGDIR_SIZE - 1);
+	hole_high = pgd_index(PAGE_OFFSET);
+
+	pgdidx_limit = pgd_index(limit);
+#if PTRS_PER_PUD > 1
+	pudidx_limit = pud_index(limit);
+#else
+	pudidx_limit = 0;
+#endif
+#if PTRS_PER_PMD > 1
+	pmdidx_limit = pmd_index(limit);
+#else
+	pmdidx_limit = 0;
+#endif
+
+	flush |= (*func)(virt_to_page(pgd), PT_PGD);
+
+	for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
 		pud_t *pud;
-		unsigned long pud_limit, pud_next;
 
-		pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP);
-
-		if (!pgd_val(*pgd))
+		if (pgdidx >= hole_low && pgdidx < hole_high)
 			continue;
 
-		pud = pud_offset(pgd, 0);
+		if (!pgd_val(pgd[pgdidx]))
+			continue;
+
+		pud = pud_offset(&pgd[pgdidx], 0);
 
 		if (PTRS_PER_PUD > 1) /* not folded */
 			flush |= (*func)(virt_to_page(pud), PT_PUD);
 
-		for (; addr != pud_limit; pud++, addr = pud_next) {
+		for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
 			pmd_t *pmd;
-			unsigned long pmd_limit;
 
-			pud_next = pud_addr_end(addr, pud_limit);
+			if (pgdidx == pgdidx_limit &&
+			    pudidx > pudidx_limit)
+				goto out;
 
-			if (pud_next < limit)
-				pmd_limit = pud_next;
-			else
-				pmd_limit = limit;
-
-			if (pud_none(*pud))
+			if (pud_none(pud[pudidx]))
 				continue;
 
-			pmd = pmd_offset(pud, 0);
+			pmd = pmd_offset(&pud[pudidx], 0);
 
 			if (PTRS_PER_PMD > 1) /* not folded */
 				flush |= (*func)(virt_to_page(pmd), PT_PMD);
 
-			for (; addr != pmd_limit; pmd++) {
-				addr += (PAGE_SIZE * PTRS_PER_PTE);
-				if ((pmd_limit-1) < (addr-1)) {
-					addr = pmd_limit;
-					break;
-				}
+			for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
+				struct page *pte;
 
-				if (pmd_none(*pmd))
+				if (pgdidx == pgdidx_limit &&
+				    pudidx == pudidx_limit &&
+				    pmdidx > pmdidx_limit)
+					goto out;
+
+				if (pmd_none(pmd[pmdidx]))
 					continue;
 
-				flush |= (*func)(pmd_page(*pmd), PT_PTE);
+				pte = pmd_page(pmd[pmdidx]);
+				flush |= (*func)(pte, PT_PTE);
 			}
 		}
 	}
-
-	flush |= (*func)(virt_to_page(pgd_base), PT_PGD);
+out:
 
 	return flush;
 }
@@ -650,6 +677,11 @@
 		xen_mc_batch();
 	}
 
+#ifdef CONFIG_X86_PAE
+	/* Need to make sure unshared kernel PMD is pinnable */
+	pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
+#endif
+
 	xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
 	xen_mc_issue(0);
 }
@@ -731,6 +763,10 @@
 
 	xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 
+#ifdef CONFIG_X86_PAE
+	/* Need to make sure unshared kernel PMD is unpinned */
+	pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
+#endif
 	pgd_walk(pgd, unpin_page, TASK_SIZE);
 
 	xen_mc_issue(0);
@@ -750,7 +786,6 @@
 	list_for_each_entry(page, &pgd_list, lru) {
 		if (PageSavePinned(page)) {
 			BUG_ON(!PagePinned(page));
-			printk("unpinning pinned %p\n", page_address(page));
 			xen_pgd_unpin((pgd_t *)page_address(page));
 			ClearPageSavePinned(page);
 		}



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 45 of 55] xen: make sure the kernel command line is right
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (43 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 44 of 55] xen: rework pgd_walk to deal with 32/64 bit Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 46 of 55] xen: enable PM_SLEEP for CONFIG_XEN Jeremy Fitzhardinge
                   ` (10 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Point the boot params cmd_line_ptr to the domain-builder-provided
command line.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1587,6 +1587,7 @@
 	boot_params.hdr.ramdisk_image = xen_start_info->mod_start
 		? __pa(xen_start_info->mod_start) : 0;
 	boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
+	boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
 
 	if (!is_initial_xendomain()) {
 		add_preferred_console("xenboot", 0, NULL);



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 46 of 55] xen: enable PM_SLEEP for CONFIG_XEN
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (44 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 45 of 55] xen: make sure the kernel command line is right Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 47 of 55] xen64: implement failsafe callback Jeremy Fitzhardinge
                   ` (9 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Xen save/restore requires PM_SLEEP to be set without requiring
SUSPEND or HIBERNATION.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 kernel/power/Kconfig |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -82,7 +82,7 @@
 
 config PM_SLEEP
 	bool
-	depends on SUSPEND || HIBERNATION
+	depends on SUSPEND || HIBERNATION || XEN
 	default y
 
 config SUSPEND



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 47 of 55] xen64: implement failsafe callback
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (45 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 46 of 55] xen: enable PM_SLEEP for CONFIG_XEN Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 48 of 55] xen64: Clear %fs on xen_load_tls() Jeremy Fitzhardinge
                   ` (8 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Implement the failsafe callback, so that iret and segment register
load exceptions are reported to the kernel.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/entry_64.S |   16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1365,10 +1365,8 @@
 # with its current contents: any discrepancy means we in category 1.
 */
 ENTRY(xen_failsafe_callback)
-#if 1
-	ud2a
-#else
-	_frame (RIP-0x30)
+	framesz = (RIP-0x30)	/* workaround buggy gas */
+	_frame framesz
 	CFI_REL_OFFSET rcx, 0
 	CFI_REL_OFFSET r11, 8
 	movw %ds,%cx
@@ -1391,8 +1389,13 @@
 	CFI_RESTORE r11
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
-	movq $11,%rdi	/* SIGSEGV */
-	jmp do_exit
+	pushq $0
+	CFI_ADJUST_CFA_OFFSET 8
+	pushq %r11
+	CFI_ADJUST_CFA_OFFSET 8
+	pushq %rcx
+	CFI_ADJUST_CFA_OFFSET 8
+	jmp general_protection
 	CFI_RESTORE_STATE
 1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
 	movq (%rsp),%rcx
@@ -1406,7 +1409,6 @@
 	SAVE_ALL
 	jmp error_exit
 	CFI_ENDPROC
-#endif
 END(xen_failsafe_callback)
 
 #endif /* CONFIG_XEN */



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 48 of 55] xen64: Clear %fs on xen_load_tls()
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (46 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 47 of 55] xen64: implement failsafe callback Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 49 of 55] xen64: implement 64-bit update_descriptor Jeremy Fitzhardinge
                   ` (7 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

From: Eduardo Habkost <ehabkost@redhat.com>

We need to do this, otherwise we can get a GPF on hypercall return
after TLS descriptor is cleared but %fs is still pointing to it.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -364,14 +364,6 @@
 
 static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
 {
-	xen_mc_batch();
-
-	load_TLS_descriptor(t, cpu, 0);
-	load_TLS_descriptor(t, cpu, 1);
-	load_TLS_descriptor(t, cpu, 2);
-
-	xen_mc_issue(PARAVIRT_LAZY_CPU);
-
 	/*
 	 * XXX sleazy hack: If we're being called in a lazy-cpu zone,
 	 * it means we're in a context switch, and %gs has just been
@@ -380,9 +372,30 @@
 	 * Either way, it has been saved, and the new value will get
 	 * loaded properly.  This will go away as soon as Xen has been
 	 * modified to not save/restore %gs for normal hypercalls.
+	 *
+	 * On x86_64, this hack is not used for %gs, because gs points
+	 * to KERNEL_GS_BASE (and uses it for PDA references), so we
+	 * must not zero %gs on x86_64
+	 *
+	 * For x86_64, we need to zero %fs, otherwise we may get an
+	 * exception between the new %fs descriptor being loaded and
+	 * %fs being effectively cleared at __switch_to().
 	 */
-	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
+	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
+#ifdef CONFIG_X86_32
 		loadsegment(gs, 0);
+#else
+		loadsegment(fs, 0);
+#endif
+	}
+
+	xen_mc_batch();
+
+	load_TLS_descriptor(t, cpu, 0);
+	load_TLS_descriptor(t, cpu, 1);
+	load_TLS_descriptor(t, cpu, 2);
+
+	xen_mc_issue(PARAVIRT_LAZY_CPU);
 }
 
 #ifdef CONFIG_X86_64



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 49 of 55] xen64: implement 64-bit update_descriptor
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (47 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 48 of 55] xen64: Clear %fs on xen_load_tls() Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 50 of 55] xen64: save lots of registers Jeremy Fitzhardinge
                   ` (6 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

64-bit hypercall interface can pass a maddr in one argument rather
than splitting it.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 include/asm-x86/xen/hypercall.h |   13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -466,10 +466,15 @@
 			struct desc_struct desc)
 {
 	mcl->op = __HYPERVISOR_update_descriptor;
-	mcl->args[0] = maddr;
-	mcl->args[1] = maddr >> 32;
-	mcl->args[2] = desc.a;
-	mcl->args[3] = desc.b;
+	if (sizeof(maddr) == sizeof(long)) {
+		mcl->args[0] = maddr;
+		mcl->args[1] = *(unsigned long *)&desc;
+	} else {
+		mcl->args[0] = maddr;
+		mcl->args[1] = maddr >> 32;
+		mcl->args[2] = desc.a;
+		mcl->args[3] = desc.b;
+	}
 }
 
 static inline void



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 50 of 55] xen64: save lots of registers
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (48 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 49 of 55] xen64: implement 64-bit update_descriptor Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 51 of 55] xen64: allocate and manage user pagetables Jeremy Fitzhardinge
                   ` (5 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

The Xen hypercall interface is allowed to trash any or all of the
argument registers, so we need to be careful that the kernel state
isn't damaged.  On 32-bit kernels, the hypercall parameter registers
same as a regparm function call, so we've got away without explicit
clobbering so far.  The 64-bit ABI defines lots of caller-save
registers, so save them all for safety.  We can trim this set later by
re-distributing the responsibility for saving all these registers.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 include/asm-x86/paravirt.h |   26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1396,8 +1396,8 @@
  * caller saved registers but the argument parameter */
 #define PV_SAVE_REGS "pushq %%rdi;"
 #define PV_RESTORE_REGS "popq %%rdi;"
-#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx"
-#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx"
+#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi"
+#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi"
 #define PV_FLAGS_ARG "D"
 #endif
 
@@ -1489,8 +1489,26 @@
 
 
 #ifdef CONFIG_X86_64
-#define PV_SAVE_REGS   pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx
-#define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
+#define PV_SAVE_REGS				\
+	push %rax;				\
+	push %rcx;				\
+	push %rdx;				\
+	push %rsi;				\
+	push %rdi;				\
+	push %r8;				\
+	push %r9;				\
+	push %r10;				\
+	push %r11
+#define PV_RESTORE_REGS				\
+	pop %r11;				\
+	pop %r10;				\
+	pop %r9;				\
+	pop %r8;				\
+	pop %rdi;				\
+	pop %rsi;				\
+	pop %rdx;				\
+	pop %rcx;				\
+	pop %rax
 #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
 #define PARA_INDIRECT(addr)	*addr(%rip)



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 51 of 55] xen64: allocate and manage user pagetables
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (49 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 50 of 55] xen64: save lots of registers Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 52 of 55] xen64: set up syscall and sysenter entrypoints for 64-bit Jeremy Fitzhardinge
                   ` (4 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Because the x86_64 architecture does not enforce segment limits, Xen
cannot protect itself with them as it does in 32-bit mode.  Therefore,
to protect itself, it runs the guest kernel in ring 3.  Since it also
runs the guest userspace in ring3, the guest kernel must maintain a
second pagetable for its userspace, which does not map kernel space.
Naturally, the guest kernel pagetables map both kernel and userspace.

The userspace pagetable is attached to the corresponding kernel
pagetable via the pgd's page->private field.  It is allocated and
freed at the same time as the kernel pgd via the
paravirt_pgd_alloc/free hooks.

Fortunately, the user pagetable is almost entirely shared with the
kernel pagetable; the only difference is the pgd page itself.  set_pgd
will populate all entries in the kernel pagetable, and also set the
corresponding user pgd entry if the address is less than
STACK_TOP_MAX.

The user pagetable must be pinned and unpinned with the kernel one,
but because the pagetables are aliased, pgd_walk() only needs to be
called on the kernel pagetable.  The user pgd page is then
pinned/unpinned along with the kernel pgd page.

xen_write_cr3 must write both the kernel and user cr3s.

The init_mm.pgd pagetable never has a user pagetable allocated for it,
because it can never be used while running usermode.

One awkward area is that early in boot the page structures are not
available.  No user pagetable can exist at that point, but it
complicates the logic to avoid looking at the page structure.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   99 +++++++++++++++++++++++++++++++++++++++-------
 arch/x86/xen/mmu.c       |   91 ++++++++++++++++++++++++++++++++++++++----
 arch/x86/xen/mmu.h       |    2
 3 files changed, 168 insertions(+), 24 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -46,7 +46,6 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/reboot.h>
-#include <asm/pgalloc.h>
 
 #include "xen-ops.h"
 #include "mmu.h"
@@ -711,29 +710,57 @@
 	x86_write_percpu(xen_current_cr3, (unsigned long)v);
 }
 
-static void xen_write_cr3(unsigned long cr3)
+static void __xen_write_cr3(bool kernel, unsigned long cr3)
 {
 	struct mmuext_op *op;
 	struct multicall_space mcs;
-	unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
+	unsigned long mfn;
 
+	if (cr3)
+		mfn = pfn_to_mfn(PFN_DOWN(cr3));
+	else
+		mfn = 0;
+
+	WARN_ON(mfn == 0 && kernel);
+
+	mcs = __xen_mc_entry(sizeof(*op));
+
+	op = mcs.args;
+	op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
+	op->arg1.mfn = mfn;
+
+	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+
+	if (kernel) {
+		x86_write_percpu(xen_cr3, cr3);
+
+		/* Update xen_current_cr3 once the batch has actually
+		   been submitted. */
+		xen_mc_callback(set_current_cr3, (void *)cr3);
+	}
+}
+
+static void xen_write_cr3(unsigned long cr3)
+{
 	BUG_ON(preemptible());
 
-	mcs = xen_mc_entry(sizeof(*op));  /* disables interrupts */
+	xen_mc_batch();  /* disables interrupts */
 
 	/* Update while interrupts are disabled, so its atomic with
 	   respect to ipis */
 	x86_write_percpu(xen_cr3, cr3);
 
-	op = mcs.args;
-	op->cmd = MMUEXT_NEW_BASEPTR;
-	op->arg1.mfn = mfn;
+	__xen_write_cr3(true, cr3);
 
-	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
-	/* Update xen_update_cr3 once the batch has actually
-	   been submitted. */
-	xen_mc_callback(set_current_cr3, (void *)cr3);
+#ifdef CONFIG_X86_64
+	{
+		pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
+		if (user_pgd)
+			__xen_write_cr3(false, __pa(user_pgd));
+		else
+			__xen_write_cr3(false, 0);
+	}
+#endif
 
 	xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
 }
@@ -792,6 +819,40 @@
 static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PMD);
+}
+
+static int xen_pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *pgd = mm->pgd;
+	int ret = 0;
+
+	BUG_ON(PagePinned(virt_to_page(pgd)));
+
+#ifdef CONFIG_X86_64
+	{
+		struct page *page = virt_to_page(pgd);
+
+		BUG_ON(page->private != 0);
+
+		page->private = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+		if (page->private == 0)
+			ret = -ENOMEM;
+
+		BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
+	}
+#endif
+
+	return ret;
+}
+
+static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+#ifdef CONFIG_X86_64
+	pgd_t *user_pgd = xen_get_user_pgd(pgd);
+
+	if (user_pgd)
+		free_page((unsigned long)user_pgd);
+#endif
 }
 
 /* This should never happen until we're OK to use struct page */
@@ -1168,8 +1229,8 @@
 	.pte_update = paravirt_nop,
 	.pte_update_defer = paravirt_nop,
 
-	.pgd_alloc = __paravirt_pgd_alloc,
-	.pgd_free = paravirt_nop,
+	.pgd_alloc = xen_pgd_alloc,
+	.pgd_free = xen_pgd_free,
 
 	.alloc_pte = xen_alloc_pte_init,
 	.release_pte = xen_release_pte_init,
@@ -1480,7 +1541,15 @@
 
 	/* Switch over */
 	pgd = init_level4_pgt;
-	xen_write_cr3(__pa(pgd));
+
+	/*
+	 * At this stage there can be no user pgd, and no page
+	 * structure to attach it to, so make sure we just set kernel
+	 * pgd.
+	 */
+	xen_mc_batch();
+	__xen_write_cr3(true, __pa(pgd));
+	xen_mc_issue(PARAVIRT_LAZY_CPU);
 
 	reserve_early(__pa(xen_start_info->pt_base),
 		      __pa(xen_start_info->pt_base +
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -57,6 +57,13 @@
 
 #include "multicalls.h"
 #include "mmu.h"
+
+/*
+ * Just beyond the highest usermode address.  STACK_TOP_MAX has a
+ * redzone above it, so round it up to a PGD boundary.
+ */
+#define USER_LIMIT	((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
+
 
 #define P2M_ENTRIES_PER_PAGE	(PAGE_SIZE / sizeof(unsigned long))
 #define TOP_ENTRIES		(MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
@@ -461,17 +468,45 @@
 	return native_make_pud(pud);
 }
 
-void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+pgd_t *xen_get_user_pgd(pgd_t *pgd)
+{
+	pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
+	unsigned offset = pgd - pgd_page;
+	pgd_t *user_ptr = NULL;
+
+	if (offset < pgd_index(USER_LIMIT)) {
+		struct page *page = virt_to_page(pgd_page);
+		user_ptr = (pgd_t *)page->private;
+		if (user_ptr)
+			user_ptr += offset;
+	}
+
+	return user_ptr;
+}
+
+static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
 {
 	struct mmu_update u;
 
+	u.ptr = virt_to_machine(ptr).maddr;
+	u.val = pgd_val_ma(val);
+	extend_mmu_update(&u);
+}
+
+/*
+ * Raw hypercall-based set_pgd, intended for in early boot before
+ * there's a page structure.  This implies:
+ *  1. The only existing pagetable is the kernel's
+ *  2. It is always pinned
+ *  3. It has no user pagetable attached to it
+ */
+void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+{
 	preempt_disable();
 
 	xen_mc_batch();
 
-	u.ptr = virt_to_machine(ptr).maddr;
-	u.val = pgd_val_ma(val);
-	extend_mmu_update(&u);
+	__xen_set_pgd_hyper(ptr, val);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 
@@ -480,14 +515,28 @@
 
 void xen_set_pgd(pgd_t *ptr, pgd_t val)
 {
+	pgd_t *user_ptr = xen_get_user_pgd(ptr);
+
 	/* If page is not pinned, we can just update the entry
 	   directly */
 	if (!page_pinned(ptr)) {
 		*ptr = val;
+		if (user_ptr) {
+			WARN_ON(page_pinned(user_ptr));
+			*user_ptr = val;
+		}
 		return;
 	}
 
-	xen_set_pgd_hyper(ptr, val);
+	/* If it's pinned, then we can at least batch the kernel and
+	   user updates together. */
+	xen_mc_batch();
+
+	__xen_set_pgd_hyper(ptr, val);
+	if (user_ptr)
+		__xen_set_pgd_hyper(user_ptr, val);
+
+	xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
 #endif	/* PAGETABLE_LEVELS == 4 */
 
@@ -526,7 +575,7 @@
 	 * space, which contains the Xen mappings.  On 32-bit these
 	 * will end up making a zero-sized hole and so is a no-op.
 	 */
-	hole_low = pgd_index(STACK_TOP_MAX + PGDIR_SIZE - 1);
+	hole_low = pgd_index(USER_LIMIT);
 	hole_high = pgd_index(PAGE_OFFSET);
 
 	pgdidx_limit = pgd_index(limit);
@@ -670,19 +719,31 @@
 {
 	xen_mc_batch();
 
-	if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
+	if (pgd_walk(pgd, pin_page, USER_LIMIT)) {
 		/* re-enable interrupts for kmap_flush_unused */
 		xen_mc_issue(0);
 		kmap_flush_unused();
 		xen_mc_batch();
 	}
 
+#ifdef CONFIG_X86_64
+	{
+		pgd_t *user_pgd = xen_get_user_pgd(pgd);
+
+		xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
+
+		if (user_pgd) {
+			pin_page(virt_to_page(user_pgd), PT_PGD);
+			xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd)));
+		}
+	}
+#else /* CONFIG_X86_32 */
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is pinnable */
 	pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
 #endif
-
 	xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
+#endif /* CONFIG_X86_64 */
 	xen_mc_issue(0);
 }
 
@@ -763,11 +824,23 @@
 
 	xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 
+#ifdef CONFIG_X86_64
+	{
+		pgd_t *user_pgd = xen_get_user_pgd(pgd);
+
+		if (user_pgd) {
+			xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd)));
+			unpin_page(virt_to_page(user_pgd), PT_PGD);
+		}
+	}
+#endif
+
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is unpinned */
 	pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
 #endif
-	pgd_walk(pgd, unpin_page, TASK_SIZE);
+
+	pgd_walk(pgd, unpin_page, USER_LIMIT);
 
 	xen_mc_issue(0);
 }
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -51,6 +51,8 @@
 void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd);
 #endif
 
+pgd_t *xen_get_user_pgd(pgd_t *pgd);
+
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 				  pte_t *ptep, pte_t pte);



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 52 of 55] xen64: set up syscall and sysenter entrypoints for 64-bit
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (50 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 51 of 55] xen64: allocate and manage user pagetables Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 53 of 55] xen64: set up userspace syscall patch Jeremy Fitzhardinge
                   ` (3 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

We set up entrypoints for syscall and sysenter.  sysenter is only used
for 32-bit compat processes, whereas syscall can be used in by both 32
and 64-bit processes.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c  |    4 +
 arch/x86/xen/setup.c      |   42 +++++++++++++-
 arch/x86/xen/smp.c        |    1
 arch/x86/xen/xen-asm_64.S |  131 +++++++++++++++++++++++++++++++++++++++++++--
 arch/x86/xen/xen-ops.h    |    3 +
 5 files changed, 175 insertions(+), 6 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1139,6 +1139,10 @@
 
 	.iret = xen_iret,
 	.irq_enable_sysexit = xen_sysexit,
+#ifdef CONFIG_X86_64
+	.usergs_sysret32 = xen_sysret32,
+	.usergs_sysret64 = xen_sysret64,
+#endif
 
 	.load_tr_desc = paravirt_nop,
 	.set_ldt = xen_set_ldt,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -86,9 +86,11 @@
  */
 static void __init fiddle_vdso(void)
 {
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 	extern const char vdso32_default_start;
 	u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK);
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
+#endif
 }
 
 static __cpuinit int register_callback(unsigned type, const void *func)
@@ -106,13 +108,46 @@
 {
 	int cpu = smp_processor_id();
 	extern void xen_sysenter_target(void);
+	int ret;
 
-	if (!boot_cpu_has(X86_FEATURE_SEP) ||
-	    register_callback(CALLBACKTYPE_sysenter,
-			      xen_sysenter_target) != 0) {
+#ifdef CONFIG_X86_32
+	if (!boot_cpu_has(X86_FEATURE_SEP)) {
+		return;
+	}
+#else
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR) {
+		return;
+	}
+#endif
+
+	ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
+	if(ret != 0) {
 		clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
 	}
+}
+
+void __cpuinit xen_enable_syscall(void)
+{
+#ifdef CONFIG_X86_64
+	int cpu = smp_processor_id();
+	int ret;
+	extern void xen_syscall_target(void);
+	extern void xen_syscall32_target(void);
+
+	ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
+	if (ret != 0) {
+		printk("failed to set syscall: %d\n", ret);
+		clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SYSCALL);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SYSCALL);
+	} else {
+		ret = register_callback(CALLBACKTYPE_syscall32,
+					xen_syscall32_target);
+		if (ret != 0)
+			printk("failed to set 32-bit syscall: %d\n", ret);
+	}
+#endif /* CONFIG_X86_64 */
 }
 
 void __init xen_arch_setup(void)
@@ -131,6 +166,7 @@
 		BUG();
 
 	xen_enable_sysenter();
+	xen_enable_syscall();
 
 	set_iopl.iopl = 1;
 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -69,6 +69,7 @@
 	preempt_disable();
 
 	xen_enable_sysenter();
+	xen_enable_syscall();
 
 	cpu = smp_processor_id();
 	smp_store_cpu_info(cpu);
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,8 @@
 
 #include <asm/asm-offsets.h>
 #include <asm/processor-flags.h>
+#include <asm/errno.h>
+#include <asm/segment.h>
 
 #include <xen/interface/xen.h>
 
@@ -138,9 +140,132 @@
 	mov 8+8(%rsp),%r11
 	ret $16
 
+hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
+/*
+	Xen64 iret frame:
+
+	ss
+	rsp
+	rflags
+	cs
+	rip		<-- standard iret frame
+
+	flags
+
+	rcx		}
+	r11		}<-- pushed by hypercall page
+rsp ->	rax		}
+ */
 ENTRY(xen_iret)
 	pushq $0
-	jmp hypercall_page + __HYPERVISOR_iret * 32
+1:	jmp hypercall_iret
+ENDPATCH(xen_iret)
+RELOC(xen_iret, 1b+1)
 
+/*
+	sysexit is not used for 64-bit processes, so it's
+	only ever used to return to 32-bit compat userspace.
+ */
 ENTRY(xen_sysexit)
-	ud2a
+	pushq $__USER32_DS
+	pushq %rcx
+	pushq $X86_EFLAGS_IF
+	pushq $__USER32_CS
+	pushq %rdx
+
+	pushq $VGCF_in_syscall
+1:	jmp hypercall_iret
+ENDPATCH(xen_sysexit)
+RELOC(xen_sysexit, 1b+1)
+
+ENTRY(xen_sysret64)
+	/* We're already on the usermode stack at this point, but still
+	   with the kernel gs, so we can easily switch back */
+	movq %rsp, %gs:pda_oldrsp
+	movq %gs:pda_kernelstack,%rsp
+
+	pushq $__USER_DS
+	pushq %gs:pda_oldrsp
+	pushq %r11
+	pushq $__USER_CS
+	pushq %rcx
+
+	pushq $VGCF_in_syscall
+1:	jmp hypercall_iret
+ENDPATCH(xen_sysret64)
+RELOC(xen_sysret64, 1b+1)
+
+ENTRY(xen_sysret32)
+	/* We're already on the usermode stack at this point, but still
+	   with the kernel gs, so we can easily switch back */
+	movq %rsp, %gs:pda_oldrsp
+	movq %gs:pda_kernelstack, %rsp
+
+	pushq $__USER32_DS
+	pushq %gs:pda_oldrsp
+	pushq %r11
+	pushq $__USER32_CS
+	pushq %rcx
+
+	pushq $VGCF_in_syscall
+1:	jmp hypercall_iret
+ENDPATCH(xen_sysret32)
+RELOC(xen_sysret32, 1b+1)
+
+/*
+	Xen handles syscall callbacks much like ordinary exceptions,
+	which means we have:
+	 - kernel gs
+	 - kernel rsp
+	 - an iret-like stack frame on the stack (including rcx and r11):
+		ss
+		rsp
+		rflags
+		cs
+		rip
+		r11
+	rsp->	rcx
+
+	In all the entrypoints, we undo all that to make it look
+	like a CPU-generated syscall/sysenter and jump to the normal
+	entrypoint.
+ */
+
+.macro undo_xen_syscall
+	mov 0*8(%rsp),%rcx
+	mov 1*8(%rsp),%r11
+	mov 5*8(%rsp),%rsp
+.endm
+
+/* Normal 64-bit system call target */
+ENTRY(xen_syscall_target)
+	undo_xen_syscall
+	jmp system_call_after_swapgs
+ENDPROC(xen_syscall_target)
+
+#ifdef CONFIG_IA32_EMULATION
+
+/* 32-bit compat syscall target */
+ENTRY(xen_syscall32_target)
+	undo_xen_syscall
+	jmp ia32_cstar_target
+ENDPROC(xen_syscall32_target)
+
+/* 32-bit compat sysenter target */
+ENTRY(xen_sysenter_target)
+	undo_xen_syscall
+	jmp ia32_sysenter_target
+ENDPROC(xen_sysenter_target)
+
+#else /* !CONFIG_IA32_EMULATION */
+
+ENTRY(xen_syscall32_target)
+ENTRY(xen_sysenter_target)
+	lea 16(%rsp), %rsp	/* strip %rcx,%r11 */
+	mov $-ENOSYS, %rax
+	pushq $VGCF_in_syscall
+	jmp hypercall_iret
+ENDPROC(xen_syscall32_target)
+ENDPROC(xen_sysenter_target)
+
+#endif	/* CONFIG_IA32_EMULATION */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -26,6 +26,7 @@
 void __init xen_arch_setup(void);
 void __init xen_init_IRQ(void);
 void xen_enable_sysenter(void);
+void xen_enable_syscall(void);
 void xen_vcpu_restore(void);
 
 void __init xen_build_dynamic_phys_to_machine(void);
@@ -70,6 +71,8 @@
 /* These are not functions, and cannot be called normally */
 void xen_iret(void);
 void xen_sysexit(void);
+void xen_sysret32(void);
+void xen_sysret64(void);
 void xen_adjust_exception_frame(void);
 
 #endif /* XEN_OPS_H */



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 53 of 55] xen64: set up userspace syscall patch
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (51 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 52 of 55] xen64: set up syscall and sysenter entrypoints for 64-bit Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 54 of 55] xen: implement Xen write_msr operation Jeremy Fitzhardinge
                   ` (2 subsequent siblings)
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

64-bit userspace expects the vdso to be mapped at a specific fixed
address, which happens to be in the middle of the kernel address
space.  Because we have split user and kernel pagetables, we need to
make special arrangements for the vsyscall mapping to appear in the
kernel part of the user pagetable.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   46 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -55,6 +55,18 @@
 
 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
+
+/*
+ * Identity map, in addition to plain kernel map.  This needs to be
+ * large enough to allocate page table pages to allocate the rest.
+ * Each page can map 2MB.
+ */
+static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
+
+#ifdef CONFIG_X86_64
+/* l3 pud for userspace vsyscall mapping */
+static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
+#endif /* CONFIG_X86_64 */
 
 /*
  * Note about cr3 (pagetable base) values:
@@ -831,12 +843,20 @@
 #ifdef CONFIG_X86_64
 	{
 		struct page *page = virt_to_page(pgd);
+		pgd_t *user_pgd;
 
 		BUG_ON(page->private != 0);
 
-		page->private = __get_free_page(GFP_KERNEL | __GFP_ZERO);
-		if (page->private == 0)
-			ret = -ENOMEM;
+		ret = -ENOMEM;
+
+		user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+		page->private = (unsigned long)user_pgd;
+
+		if (user_pgd != NULL) {
+			user_pgd[pgd_index(VSYSCALL_START)] =
+				__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
+			ret = 0;
+		}
 
 		BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
 	}
@@ -977,6 +997,9 @@
 	pv_mmu_ops.release_pud = xen_release_pud;
 #endif
 
+#ifdef CONFIG_X86_64
+	SetPagePinned(virt_to_page(level3_user_vsyscall));
+#endif
 	xen_mark_init_mm_pinned();
 }
 
@@ -1088,6 +1111,15 @@
 	}
 
 	__native_set_fixmap(idx, pte);
+
+#ifdef CONFIG_X86_64
+	/* Replicate changes to map the vsyscall page into the user
+	   pagetable vsyscall mapping. */
+	if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+		unsigned long vaddr = __fix_to_virt(idx);
+		set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
+	}
+#endif
 }
 
 static const struct pv_info xen_info __initdata = {
@@ -1427,13 +1459,6 @@
 		BUG();
 }
 
-/*
- * Identity map, in addition to plain kernel map.  This needs to be
- * large enough to allocate page table pages to allocate the rest.
- * Each page can map 2MB.
- */
-static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
-
 static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 {
 	unsigned pmdidx, pteidx;
@@ -1533,6 +1558,7 @@
 	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
 	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 54 of 55] xen: implement Xen write_msr operation
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (52 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 53 of 55] xen64: set up userspace syscall patch Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-08 22:07 ` [PATCH 55 of 55] xen: update Kconfig to allow 64-bit Xen Jeremy Fitzhardinge
  2008-07-09 11:12 ` [PATCH 00 of 55] xen64: implement 64-bit Xen support Ingo Molnar
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

64-bit uses MSRs for important things like the base for fs and
gs-prefixed addresses.  It's more efficient to use a hypercall to
update these, rather than go via the trap and emulate path.

Other MSR writes are just passed through; in an unprivileged domain
they do nothing, but it might be useful later.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -41,6 +41,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/fixmap.h>
 #include <asm/processor.h>
+#include <asm/msr-index.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
@@ -777,6 +778,34 @@
 	xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
 }
 
+static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+{
+	int ret;
+
+	ret = 0;
+
+	switch(msr) {
+#ifdef CONFIG_X86_64
+		unsigned which;
+		u64 base;
+
+	case MSR_FS_BASE:		which = SEGBASE_FS; goto set;
+	case MSR_KERNEL_GS_BASE:	which = SEGBASE_GS_USER; goto set;
+	case MSR_GS_BASE:		which = SEGBASE_GS_KERNEL; goto set;
+
+	set:
+		base = ((u64)high << 32) | low;
+		if (HYPERVISOR_set_segment_base(which, base) != 0)
+			ret = -EFAULT;
+		break;
+#endif
+	default:
+		ret = native_write_msr_safe(msr, low, high);
+	}
+
+	return ret;
+}
+
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
 static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
@@ -1165,7 +1194,7 @@
 	.wbinvd = native_wbinvd,
 
 	.read_msr = native_read_msr_safe,
-	.write_msr = native_write_msr_safe,
+	.write_msr = xen_write_msr_safe,
 	.read_tsc = native_read_tsc,
 	.read_pmc = native_read_pmc,
 



^ permalink raw reply	[flat|nested] 80+ messages in thread

* [PATCH 55 of 55] xen: update Kconfig to allow 64-bit Xen
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (53 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 54 of 55] xen: implement Xen write_msr operation Jeremy Fitzhardinge
@ 2008-07-08 22:07 ` Jeremy Fitzhardinge
  2008-07-09 11:12 ` [PATCH 00 of 55] xen64: implement 64-bit Xen support Ingo Molnar
  55 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-08 22:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin, x86

Allow Xen to be enabled on 64-bit.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/Kconfig |    9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,8 +6,8 @@
 	bool "Xen guest support"
 	select PARAVIRT
 	select PARAVIRT_CLOCK
-	depends on X86_32
-	depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER)
+	depends on X86_64 || (X86_32 && X86_PAE && !(X86_VISWS || X86_VOYAGER))
+	depends on X86_CMPXCHG && X86_TSC
 	help
 	  This is the Linux Xen port.  Enabling this will allow the
 	  kernel to boot in a paravirtualized environment under the
@@ -15,10 +15,11 @@
 
 config XEN_MAX_DOMAIN_MEMORY
        int "Maximum allowed size of a domain in gigabytes"
-       default 8
+       default 8 if X86_32
+       default 32 if X86_64
        depends on XEN
        help
          The pseudo-physical to machine address array is sized
          according to the maximum possible memory size of a Xen
          domain.  This array uses 1 page per gigabyte, so there's no
-         need to be too stingy here.
\ No newline at end of file
+         need to be too stingy here.



^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 16 of 55] xen64: add extra pv_mmu_ops
  2008-07-08 22:06 ` [PATCH 16 of 55] xen64: add extra pv_mmu_ops Jeremy Fitzhardinge
@ 2008-07-09  7:55   ` Mark McLoughlin
  2008-07-09  8:02     ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 80+ messages in thread
From: Mark McLoughlin @ 2008-07-09  7:55 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ingo Molnar, LKML, x86, Stephen Tweedie, Eduardo Habkost

Hey Jeremy,

On Tue, 2008-07-08 at 15:06 -0700, Jeremy Fitzhardinge wrote:
> We need extra pv_mmu_ops for 64-bit, to deal with the extra level of
> pagetable.
> 
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> ---
>  arch/x86/xen/enlighten.c   |   33 +++++++++++++++++++++++++++-
>  arch/x86/xen/mmu.c         |   51 +++++++++++++++++++++++++++++++++++++++++++-
>  arch/x86/xen/mmu.h         |   15 +++++++++++-
>  include/asm-x86/xen/page.h |    4 +++
>  4 files changed, 99 insertions(+), 4 deletions(-)
...
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -438,14 +438,19 @@
>  
>  void xen_set_pte(pte_t *ptep, pte_t pte)
>  {
> +#ifdef CONFIG_X86_PAE
>  	ptep->pte_high = pte.pte_high;
>  	smp_wmb();
>  	ptep->pte_low = pte.pte_low;
> +#else
> +	*ptep = pte;
> +#endif
>  }

You've dropped non-PAE support already, right? Any reason to use the
X86_PAE macro instead of X86_32?

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 16 of 55] xen64: add extra pv_mmu_ops
  2008-07-09  7:55   ` Mark McLoughlin
@ 2008-07-09  8:02     ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-09  8:02 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: Ingo Molnar, LKML, x86, Stephen Tweedie, Eduardo Habkost

Mark McLoughlin wrote:
> Hey Jeremy,
>
> On Tue, 2008-07-08 at 15:06 -0700, Jeremy Fitzhardinge wrote:
>   
>> We need extra pv_mmu_ops for 64-bit, to deal with the extra level of
>> pagetable.
>>
>> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
>> ---
>>  arch/x86/xen/enlighten.c   |   33 +++++++++++++++++++++++++++-
>>  arch/x86/xen/mmu.c         |   51 +++++++++++++++++++++++++++++++++++++++++++-
>>  arch/x86/xen/mmu.h         |   15 +++++++++++-
>>  include/asm-x86/xen/page.h |    4 +++
>>  4 files changed, 99 insertions(+), 4 deletions(-)
>>     
> ...
>   
>> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
>> --- a/arch/x86/xen/mmu.c
>> +++ b/arch/x86/xen/mmu.c
>> @@ -438,14 +438,19 @@
>>  
>>  void xen_set_pte(pte_t *ptep, pte_t pte)
>>  {
>> +#ifdef CONFIG_X86_PAE
>>  	ptep->pte_high = pte.pte_high;
>>  	smp_wmb();
>>  	ptep->pte_low = pte.pte_low;
>> +#else
>> +	*ptep = pte;
>> +#endif
>>  }
>>     
>
> You've dropped non-PAE support already, right? Any reason to use the
> X86_PAE macro instead of X86_32?
>   

It's a fine difference, but the specific thing I'm testing for is 
actually PAE vs 64-bit (or even more specifically "can I update a whole 
pte atomically?").  Testing for X86_32 is equivalent in the absence of 
non-PAE 32-bit, but not quite as correct.  The other way I could do it 
is as I have elsewhere:

	if (sizeof(pte_t) > sizeof(long)) {
		ptep->pte_high = pte.pte_high;
		smp_wmb();
		ptep->pte_low = pte.pte_low;
	} else
		*ptep = pte;

But there are other places where it tests for this using PAE (and even 
whole pvops which are only defined in the PAE case).

You'll notice other places where I test for PAGETABLE_LEVELS == 3 where 
the code  is generally handling 3-level pagetables, and others where I 
test for PAE where it's something specific to PAE.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support
  2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
                   ` (54 preceding siblings ...)
  2008-07-08 22:07 ` [PATCH 55 of 55] xen: update Kconfig to allow 64-bit Xen Jeremy Fitzhardinge
@ 2008-07-09 11:12 ` Ingo Molnar
  2008-07-09 11:16   ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
                     ` (3 more replies)
  55 siblings, 4 replies; 80+ messages in thread
From: Ingo Molnar @ 2008-07-09 11:12 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin


* Jeremy Fitzhardinge <jeremy@goop.org> wrote:

> Hi Ingo,
> 
> Here's the set of patches to implement 64-bit Xen support.
> 
> The first part of the series is some more (fairly minor) x86 arch 
> updates which here missed in the previous series.

thanks Jeremy, applied.

Since they depend on the generic-ipi infrastructure changes, i've 
created a separate, new tip/xen-64bit branch. The tip/x86/xen-64bit got 
merged into tip/x86/core and thus is headed for upstream. I have added 
your patches to the new topic branch - see the shortlog further below.

> Following that are the Xen-specific changes to implement 64-bit 
> support.  It works fairly well, but I know of a couple of bugs:
> 
>  - 32-bit emulation doesn't work properly yet.  Something goes wrong
>    with %gs, and a userspace %gs: reference segfaults.
> 
>  - It crashes when bringing up secondary CPUs under some combinations
>    of config.  I think it isn't quite setting up all the CPU sibling
>    topology stuff for the various scheduler policies.  It's trying to
>    set up the most simple of arrangements (every CPU is a singleton
>    with no shared cache or anything else).  It was quite tricky to
>    arrange this...
> 
> I hope to have followup patches to address both of these in the next 
> couple of days.  I expect both fixes will be small.

that's OK, it should only affect 64-bit Xen, correct?

Right now we need to get the generic impact of these patches tested, 
fixes to xen64 functionality can be done later on in an add-on manner.

Testing: i've done a trial merge of tip/xen-64bit to tip/master - not 
pushed out yet. If it holds up in testing i'll put the integration rule 
into tip/master.

tip/xen-64bit (which i've just pushed out) can be cleanly git-merged 
into tip/master, so if you send updates then please send it against such 
a merged tree - even if tip/master does not have tip/xen-64bit yet. 
These patches will need at least half a day of testing before i can push 
them out.

	Ingo

-------------------------->
Eduardo Habkost (6):
      x86/paravirt: call paravirt_pagetable_setup_{start, done}
      pvops-64: call paravirt_post_allocator_init() on setup_arch()
      xen64: xen_write_idt_entry() and cvt_gate_to_trap()
      Xen64: HYPERVISOR_set_segment_base() implementation
      xen64: implement xen_load_gs_index()
      xen64: Clear %fs on xen_load_tls()

Isaku Yamahata (2):
      xen-netfront: fix xennet_release_tx_bufs()
      xen: add xen_arch_resume()/xen_timer_resume hook for ia64 support

Jeremy Fitzhardinge (47):
      x86_64: there's no need to preallocate level1_fixmap_pgt
      x86: clean up formatting of __switch_to
      x86: use __page_aligned_data/bss
      x86_64: adjust exception frame in ia32entry
      x86_64: unstatic get_local_pda
      xen: print backtrace on multicall failure
      xen: define set_pte from the outset
      xen64: define asm/xen/interface for 64-bit
      xen: make ELF notes work for 32 and 64 bit
      xen: fix 64-bit hypercall variants
      xen64: fix calls into hypercall page
      xen64: add extra pv_mmu_ops
      xen64: random ifdefs to mask out 32-bit only code
      xen64: get active_mm from the pda
      xen: move smp setup into smp.c
      x86_64: add workaround for no %gs-based percpu
      xen64: smp.c compile hacking
      xen64: add xen-head code to head_64.S
      xen64: add asm-offsets
      xen64: add 64-bit assembler
      xen64: use set_fixmap for shared_info structure
      xen: cpu_detect is 32-bit only
      xen64: add hypervisor callbacks for events, etc
      xen64: early mapping setup
      xen64: 64-bit starts using set_pte from very early
      xen64: map an initial chunk of physical memory
      xen32: create initial mappings like 64-bit
      xen: fix truncation of machine address
      xen64: use arbitrary_virt_to_machine for xen_set_pmd
      xen: set num_processors
      xen64: defer setting pagetable alloc/release ops
      xen: use set_pte_vaddr
      xen64: deal with extra words Xen pushes onto exception frames
      xen64: add pvop for swapgs
      xen64: register callbacks in arch-independent way
      xen64: add identity irq->vector map
      xen: rework pgd_walk to deal with 32/64 bit
      xen: make sure the kernel command line is right
      suspend, xen: enable PM_SLEEP for CONFIG_XEN
      xen64: implement failsafe callback
      xen64: implement 64-bit update_descriptor
      xen64: save lots of registers
      xen64: allocate and manage user pagetables
      xen64: set up syscall and sysenter entrypoints for 64-bit
      xen64: set up userspace syscall patch
      xen: implement Xen write_msr operation
      xen: update Kconfig to allow 64-bit Xen


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support)
  2008-07-09 11:12 ` [PATCH 00 of 55] xen64: implement 64-bit Xen support Ingo Molnar
@ 2008-07-09 11:16   ` Ingo Molnar
  2008-07-09 19:47     ` Ingo Molnar
  2008-07-09 11:21   ` [patch] xen64: fix !HVC_XEN build dependency (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 80+ messages in thread
From: Ingo Molnar @ 2008-07-09 11:16 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin,
	Rafael J. Wysocki


* Ingo Molnar <mingo@elte.hu> wrote:

> Testing: i've done a trial merge of tip/xen-64bit to tip/master - not 
> pushed out yet. If it holds up in testing i'll put the integration 
> rule into tip/master.
> 
> tip/xen-64bit (which i've just pushed out) can be cleanly git-merged 
> into tip/master, so if you send updates then please send it against 
> such a merged tree - even if tip/master does not have tip/xen-64bit 
> yet. These patches will need at least half a day of testing before i 
> can push them out.

find a build fixlet below, for this config:

  http://redhat.com/~mingo/misc/config-Wed_Jul__9_13_02_55_CEST_2008.bad

Rafael, do you agree with how we now can enable PM_SLEEP without having 
PM enabled? xen64 would like to have that - and it needs the fixlet 
below.

	Ingo

--------------->
commit 9280dc82a739c68945d61c80ba06bef6a9383c87
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jul 9 13:09:43 2008 +0200

    power, xen64: fix PM_SLEEP build dependencies
    
    fix:
    
    kernel/built-in.o: In function `do_fork':
    : undefined reference to `refrigerator'
    kernel/built-in.o: In function `do_signal_stop':
    signal.c:(.text+0x1b076): undefined reference to `refrigerator'
    kernel/built-in.o: In function `ptrace_stop':
    signal.c:(.text+0x1b285): undefined reference to `refrigerator'
    kernel/built-in.o: In function `get_signal_to_deliver':
    : undefined reference to `refrigerator'
    kernel/built-in.o: In function `worker_thread':
    workqueue.c:(.text+0x212c1): undefined reference to `refrigerator'
    [...]
    
    as CONFIG_PM_SLEEP can now be enabled without CONFIG_PM.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/Makefile b/kernel/Makefile
index 0a05120..6678a60 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_PM) += power/
+obj-$(CONFIG_PM_SLEEP) += power/
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [patch] xen64: fix !HVC_XEN build dependency (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support)
  2008-07-09 11:12 ` [PATCH 00 of 55] xen64: implement 64-bit Xen support Ingo Molnar
  2008-07-09 11:16   ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
@ 2008-07-09 11:21   ` Ingo Molnar
  2008-07-09 16:07     ` [patch] xen64: fix !HVC_XEN build dependency Jeremy Fitzhardinge
  2008-07-09 11:47   ` [patch] xen64: fix build error on 32-bit + !HIGHMEM (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
  2008-07-09 16:12   ` [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
  3 siblings, 1 reply; 80+ messages in thread
From: Ingo Molnar @ 2008-07-09 11:21 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin


another small build problem that -tip testing exposed, with this config:

  http://redhat.com/~mingo/misc/config-Wed_Jul__9_13_02_55_CEST_2008.bad

find the fix below.

	Ingo

------------------->
commit 615ec52c1c85f7bbbcced89761fcd89b2115865d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jul 9 13:15:03 2008 +0200

    xen64: fix !HVC_XEN build dependency
    
    fix:
    
    arch/x86/xen/built-in.o: In function `set_page_prot':
    enlighten.c:(.text+0x111d): undefined reference to `xen_raw_printk'
    arch/x86/xen/built-in.o: In function `xen_start_kernel':
    : undefined reference to `xen_raw_console_write'
    arch/x86/xen/built-in.o: In function `xen_start_kernel':
    : undefined reference to `xen_raw_console_write'
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h
index 98b79bc..c3adde3 100644
--- a/include/xen/hvc-console.h
+++ b/include/xen/hvc-console.h
@@ -5,11 +5,12 @@ extern struct console xenboot_console;
 
 #ifdef CONFIG_HVC_XEN
 void xen_console_resume(void);
+void xen_raw_console_write(const char *str);
+void xen_raw_printk(const char *fmt, ...);
 #else
 static inline void xen_console_resume(void) { }
+static inline void xen_raw_console_write(const char *str) { }
+static inline void xen_raw_printk(const char *fmt, ...) { }
 #endif
 
-void xen_raw_console_write(const char *str);
-void xen_raw_printk(const char *fmt, ...);
-
 #endif	/* XEN_HVC_CONSOLE_H */

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [patch] xen64: fix build error on 32-bit + !HIGHMEM  (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support)
  2008-07-09 11:12 ` [PATCH 00 of 55] xen64: implement 64-bit Xen support Ingo Molnar
  2008-07-09 11:16   ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
  2008-07-09 11:21   ` [patch] xen64: fix !HVC_XEN build dependency (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
@ 2008-07-09 11:47   ` Ingo Molnar
  2008-07-09 16:07     ` [patch] xen64: fix build error on 32-bit + !HIGHMEM Jeremy Fitzhardinge
  2008-07-09 16:12   ` [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
  3 siblings, 1 reply; 80+ messages in thread
From: Ingo Molnar @ 2008-07-09 11:47 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin


another small build fixlet, on 32-bit.

	Ingo

---------------->
commit c3d03b62eb73cb66415b7bb3031981c6a0cb90c3
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jul 9 13:45:33 2008 +0200

    xen64: fix build error on 32-bit + !HIGHMEM
    
    fix:
    
    arch/x86/xen/enlighten.c: In function 'xen_set_fixmap':
    arch/x86/xen/enlighten.c:1127: error: 'FIX_KMAP_BEGIN' undeclared (first use in this function)
    arch/x86/xen/enlighten.c:1127: error: (Each undeclared identifier is reported only once
    arch/x86/xen/enlighten.c:1127: error: for each function it appears in.)
    arch/x86/xen/enlighten.c:1127: error: 'FIX_KMAP_END' undeclared (first use in this function)
    make[1]: *** [arch/x86/xen/enlighten.o] Error 1
    make: *** [arch/x86/xen/enlighten.o] Error 2
    
    FIX_KMAP_BEGIN is only available on HIGHMEM.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 776c0fb..3da6acb 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1124,7 +1124,9 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
 #ifdef CONFIG_X86_32
 	case FIX_WP_TEST:
 	case FIX_VDSO:
+# ifdef CONFIG_HIGHMEM
 	case FIX_KMAP_BEGIN ... FIX_KMAP_END:
+# endif
 #else
 	case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
 #endif

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* Re: [patch] xen64: fix !HVC_XEN build dependency
  2008-07-09 11:21   ` [patch] xen64: fix !HVC_XEN build dependency (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
@ 2008-07-09 16:07     ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-09 16:07 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin

Ingo Molnar wrote:
> another small build problem that -tip testing exposed, with this config:
>
>   http://redhat.com/~mingo/misc/config-Wed_Jul__9_13_02_55_CEST_2008.bad
>
> find the fix below.
>   

Looks good, thanks.  Which reminds me, I should put printf formatting 
checks on those prototypes.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] xen64: fix build error on 32-bit + !HIGHMEM
  2008-07-09 11:47   ` [patch] xen64: fix build error on 32-bit + !HIGHMEM (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
@ 2008-07-09 16:07     ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-09 16:07 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin

Ingo Molnar wrote:
> another small build fixlet, on 32-bit.
>
> 	Ingo
>
> ---------------->
> commit c3d03b62eb73cb66415b7bb3031981c6a0cb90c3
> Author: Ingo Molnar <mingo@elte.hu>
> Date:   Wed Jul 9 13:45:33 2008 +0200
>
>     xen64: fix build error on 32-bit + !HIGHMEM
>     
>     fix:
>     
>     arch/x86/xen/enlighten.c: In function 'xen_set_fixmap':
>     arch/x86/xen/enlighten.c:1127: error: 'FIX_KMAP_BEGIN' undeclared (first use in this function)
>     arch/x86/xen/enlighten.c:1127: error: (Each undeclared identifier is reported only once
>     arch/x86/xen/enlighten.c:1127: error: for each function it appears in.)
>     arch/x86/xen/enlighten.c:1127: error: 'FIX_KMAP_END' undeclared (first use in this function)
>     make[1]: *** [arch/x86/xen/enlighten.o] Error 1
>     make: *** [arch/x86/xen/enlighten.o] Error 2
>     
>     FIX_KMAP_BEGIN is only available on HIGHMEM.
>     
>     Signed-off-by: Ingo Molnar <mingo@elte.hu>
>   

Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support
  2008-07-09 11:12 ` [PATCH 00 of 55] xen64: implement 64-bit Xen support Ingo Molnar
                     ` (2 preceding siblings ...)
  2008-07-09 11:47   ` [patch] xen64: fix build error on 32-bit + !HIGHMEM (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
@ 2008-07-09 16:12   ` Jeremy Fitzhardinge
  3 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-09 16:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin

Ingo Molnar wrote:
> thanks Jeremy, applied.
>
> Since they depend on the generic-ipi infrastructure changes, i've 
> created a separate, new tip/xen-64bit branch. The tip/x86/xen-64bit got 
> merged into tip/x86/core and thus is headed for upstream. I have added 
> your patches to the new topic branch - see the shortlog further below.
>   

Speaking of generic-ipi, did you see the bugfix I posted for 
generic_smp_function_interrupt the other day?

I'm also seeing bad pointer dereferences in 
generic_smp_function_interrupt with some kernel configurations, which 
look like RCU failures (either oopses on following the next pointer in 
the list, or a bad function pointer for the call).  Other kernel configs 
are completely stable.  I haven't identified what the difference between 
working and not working is yet.

>> Following that are the Xen-specific changes to implement 64-bit 
>> support.  It works fairly well, but I know of a couple of bugs:
>>
>>  - 32-bit emulation doesn't work properly yet.  Something goes wrong
>>    with %gs, and a userspace %gs: reference segfaults.
>>
>>  - It crashes when bringing up secondary CPUs under some combinations
>>    of config.  I think it isn't quite setting up all the CPU sibling
>>    topology stuff for the various scheduler policies.  It's trying to
>>    set up the most simple of arrangements (every CPU is a singleton
>>    with no shared cache or anything else).  It was quite tricky to
>>    arrange this...
>>
>> I hope to have followup patches to address both of these in the next 
>> couple of days.  I expect both fixes will be small.
>>     
>
> that's OK, it should only affect 64-bit Xen, correct?
>   

Yep.  And if you avoid those two problem areas, it all seems pretty 
stable (16 vcpu kernbench runs with no problems, etc).

> Right now we need to get the generic impact of these patches tested, 
> fixes to xen64 functionality can be done later on in an add-on manner.
>
> Testing: i've done a trial merge of tip/xen-64bit to tip/master - not 
> pushed out yet. If it holds up in testing i'll put the integration rule 
> into tip/master.
>
> tip/xen-64bit (which i've just pushed out) can be cleanly git-merged 
> into tip/master, so if you send updates then please send it against such 
> a merged tree - even if tip/master does not have tip/xen-64bit yet. 
> These patches will need at least half a day of testing before i can push 
> them out.
>   

OK, thanks.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support)
  2008-07-09 11:16   ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
@ 2008-07-09 19:47     ` Ingo Molnar
  2008-07-09 19:52       ` Ingo Molnar
  0 siblings, 1 reply; 80+ messages in thread
From: Ingo Molnar @ 2008-07-09 19:47 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin,
	Rafael J. Wysocki


-tip testing found another PM/PM_SLEEP victim:

 arch/x86/kernel/built-in.o: In function `suspend':
 apm_32.c:(.text+0xedb5): undefined reference to `save_processor_state'
 apm_32.c:(.text+0xedd0): undefined reference to `restore_processor_state'

with this config:
 
 http://redhat.com/~mingo/misc/config-Wed_Jul__9_18_53_45_CEST_2008.bad

havent figured out yet what it's about, but the core scenario seems to 
be this combination (and only this combination) of config values:

  # CONFIG_PM is not set
  CONFIG_PM_SLEEP=y
  CONFIG_APM=y
  CONFIG_PARAVIRT=y
  CONFIG_XEN=y

( accordingly, this build failure only hit two testsystems - the others
  were able to do 250 successful builds without ever hitting this. It
  needs to be fixed nevertheless, to keep the tests going. )

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support)
  2008-07-09 19:47     ` Ingo Molnar
@ 2008-07-09 19:52       ` Ingo Molnar
  2008-07-09 19:59         ` Rafael J. Wysocki
  0 siblings, 1 reply; 80+ messages in thread
From: Ingo Molnar @ 2008-07-09 19:52 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: LKML, x86, Stephen Tweedie, Eduardo Habkost, Mark McLoughlin,
	Rafael J. Wysocki


* Ingo Molnar <mingo@elte.hu> wrote:

> -tip testing found another PM/PM_SLEEP victim:
> 
>  arch/x86/kernel/built-in.o: In function `suspend':
>  apm_32.c:(.text+0xedb5): undefined reference to `save_processor_state'
>  apm_32.c:(.text+0xedd0): undefined reference to `restore_processor_state'

ah, found it - the arch-level power/ directory had a CONFIG_PM-only 
build dependency as well. Fixed via the commit below.

	Ingo

------------->
commit 6666fa5ebb7c78afcab540bf183c7f070c890930
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jul 9 21:45:54 2008 +0200

    power, xen64: fix PM_SLEEP build dependencies, #2
    
    -tip testing found another PM/PM_SLEEP victim:
    
     arch/x86/kernel/built-in.o: In function `suspend':
     apm_32.c:(.text+0xedb5): undefined reference to `save_processor_state'
     apm_32.c:(.text+0xedd0): undefined reference to `restore_processor_state'
    
    with this config:
    
     http://redhat.com/~mingo/misc/config-Wed_Jul__9_18_53_45_CEST_2008.bad
    
    the core scenario is this combination (and only this combination) of
    config values:
    
      # CONFIG_PM is not set
      CONFIG_PM_SLEEP=y
      CONFIG_APM=y
      CONFIG_PARAVIRT=y
      CONFIG_XEN=y
    
    the problem is that the .o file where these symbols live does get built
    on PM_SLEEP=y, but the directory itself was only dependent on CONFIG_PM.
    
    Fix it by adding another rule that also covers the CONFIG_PM_SLEEP case.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index fc698f2..4c85d09 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -175,6 +175,7 @@ drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
 
 # suspend and hibernation support
 drivers-$(CONFIG_PM) += arch/x86/power/
+drivers-$(CONFIG_PM_SLEEP) += arch/x86/power/
 
 ifeq ($(CONFIG_X86_32),y)
 drivers-$(CONFIG_FB) += arch/x86/video/

^ permalink raw reply related	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support)
  2008-07-09 19:52       ` Ingo Molnar
@ 2008-07-09 19:59         ` Rafael J. Wysocki
  2008-07-09 20:02           ` Rafael J. Wysocki
  0 siblings, 1 reply; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-07-09 19:59 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jeremy Fitzhardinge, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin

On Wednesday, 9 of July 2008, Ingo Molnar wrote:
> 
> * Ingo Molnar <mingo@elte.hu> wrote:
> 
> > -tip testing found another PM/PM_SLEEP victim:
> > 
> >  arch/x86/kernel/built-in.o: In function `suspend':
> >  apm_32.c:(.text+0xedb5): undefined reference to `save_processor_state'
> >  apm_32.c:(.text+0xedd0): undefined reference to `restore_processor_state'
> 
> ah, found it - the arch-level power/ directory had a CONFIG_PM-only 
> build dependency as well. Fixed via the commit below.
> 
> 	Ingo
> 
> ------------->
> commit 6666fa5ebb7c78afcab540bf183c7f070c890930
> Author: Ingo Molnar <mingo@elte.hu>
> Date:   Wed Jul 9 21:45:54 2008 +0200
> 
>     power, xen64: fix PM_SLEEP build dependencies, #2
>     
>     -tip testing found another PM/PM_SLEEP victim:
>     
>      arch/x86/kernel/built-in.o: In function `suspend':
>      apm_32.c:(.text+0xedb5): undefined reference to `save_processor_state'
>      apm_32.c:(.text+0xedd0): undefined reference to `restore_processor_state'
>     
>     with this config:
>     
>      http://redhat.com/~mingo/misc/config-Wed_Jul__9_18_53_45_CEST_2008.bad
>     
>     the core scenario is this combination (and only this combination) of
>     config values:
>     
>       # CONFIG_PM is not set
>       CONFIG_PM_SLEEP=y
>       CONFIG_APM=y
>       CONFIG_PARAVIRT=y
>       CONFIG_XEN=y

This combination actually doesn't make sense whatsoever.

PM_SLEEP depends (indirectly) on PM and the fact that it's possible to use
a .config violating this dependency is a build system problem, really.

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support)
  2008-07-09 19:59         ` Rafael J. Wysocki
@ 2008-07-09 20:02           ` Rafael J. Wysocki
  2008-07-09 20:04             ` Ingo Molnar
  0 siblings, 1 reply; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-07-09 20:02 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jeremy Fitzhardinge, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin

On Wednesday, 9 of July 2008, Rafael J. Wysocki wrote:
> On Wednesday, 9 of July 2008, Ingo Molnar wrote:
> > 
> > * Ingo Molnar <mingo@elte.hu> wrote:
> > 
> > > -tip testing found another PM/PM_SLEEP victim:
> > > 
> > >  arch/x86/kernel/built-in.o: In function `suspend':
> > >  apm_32.c:(.text+0xedb5): undefined reference to `save_processor_state'
> > >  apm_32.c:(.text+0xedd0): undefined reference to `restore_processor_state'
> > 
> > ah, found it - the arch-level power/ directory had a CONFIG_PM-only 
> > build dependency as well. Fixed via the commit below.
> > 
> > 	Ingo
> > 
> > ------------->
> > commit 6666fa5ebb7c78afcab540bf183c7f070c890930
> > Author: Ingo Molnar <mingo@elte.hu>
> > Date:   Wed Jul 9 21:45:54 2008 +0200
> > 
> >     power, xen64: fix PM_SLEEP build dependencies, #2
> >     
> >     -tip testing found another PM/PM_SLEEP victim:
> >     
> >      arch/x86/kernel/built-in.o: In function `suspend':
> >      apm_32.c:(.text+0xedb5): undefined reference to `save_processor_state'
> >      apm_32.c:(.text+0xedd0): undefined reference to `restore_processor_state'
> >     
> >     with this config:
> >     
> >      http://redhat.com/~mingo/misc/config-Wed_Jul__9_18_53_45_CEST_2008.bad
> >     
> >     the core scenario is this combination (and only this combination) of
> >     config values:
> >     
> >       # CONFIG_PM is not set
> >       CONFIG_PM_SLEEP=y
> >       CONFIG_APM=y
> >       CONFIG_PARAVIRT=y
> >       CONFIG_XEN=y
> 
> This combination actually doesn't make sense whatsoever.
> 
> PM_SLEEP depends (indirectly) on PM and the fact that it's possible to use
> a .config violating this dependency is a build system problem, really.

Your patch is correct, though. :-)

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support)
  2008-07-09 20:02           ` Rafael J. Wysocki
@ 2008-07-09 20:04             ` Ingo Molnar
  2008-07-09 20:17               ` [patch] power, xen64: fix PM_SLEEP build dependencies Jeremy Fitzhardinge
  2008-07-09 20:17               ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Rafael J. Wysocki
  0 siblings, 2 replies; 80+ messages in thread
From: Ingo Molnar @ 2008-07-09 20:04 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Jeremy Fitzhardinge, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin


* Rafael J. Wysocki <rjw@sisk.pl> wrote:

> > This combination actually doesn't make sense whatsoever.
> > 
> > PM_SLEEP depends (indirectly) on PM and the fact that it's possible 
> > to use a .config violating this dependency is a build system 
> > problem, really.
> 
> Your patch is correct, though. :-)

yes, that combination doesnt make sense in -git, but tip/xen64 tries the 
!PM && PM_SLEEP combination - see the patch below. What do you think 
about that patch?

I think Jeremy's patch makes sense, but no strong feelings. We can 
certainly map out these side-effects. (the fixes you've been Cc:-ed to 
should be roughly all that can happen i think.)

	Ingo

------------------->
commit 6fbbec428c8e7bb617da2e8a589af2e97bcf3bc4
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Tue Jul 8 15:07:08 2008 -0700

    suspend, xen: enable PM_SLEEP for CONFIG_XEN
    
    Xen save/restore requires PM_SLEEP to be set without requiring
    SUSPEND or HIBERNATION.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Cc: Stephen Tweedie <sct@redhat.com>
    Cc: Eduardo Habkost <ehabkost@redhat.com>
    Cc: Mark McLoughlin <markmc@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index b45da40..1436c47 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -82,7 +82,7 @@ config PM_SLEEP_SMP
 
 config PM_SLEEP
 	bool
-	depends on SUSPEND || HIBERNATION
+	depends on SUSPEND || HIBERNATION || XEN
 	default y
 
 config SUSPEND


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies
  2008-07-09 20:04             ` Ingo Molnar
@ 2008-07-09 20:17               ` Jeremy Fitzhardinge
  2008-07-09 20:17               ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Rafael J. Wysocki
  1 sibling, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-09 20:17 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rafael J. Wysocki, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin

Ingo Molnar wrote:
> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
>
>   
>>> This combination actually doesn't make sense whatsoever.
>>>
>>> PM_SLEEP depends (indirectly) on PM and the fact that it's possible 
>>> to use a .config violating this dependency is a build system 
>>> problem, really.
>>>       
>> Your patch is correct, though. :-)
>>     
>
> yes, that combination doesnt make sense in -git, but tip/xen64 tries the 
> !PM && PM_SLEEP combination - see the patch below. What do you think 
> about that patch?
>
> I think Jeremy's patch makes sense, but no strong feelings. We can 
> certainly map out these side-effects. (the fixes you've been Cc:-ed to 
> should be roughly all that can happen i think.)
>   

I found the PM dependencies a bit confusing.  Xen save/restore/migrate 
is functionally equivalent to some mixture HIBERNATE and SUSPEND (saving 
an image to disk is HIBERNATE, but live migration is more like 
SUSPEND).  Its implementation needs to be able to get the device model 
to do the right things - specifically the system timer devices - but has 
no other dependency on the rest of the PM infrastructure.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support)
  2008-07-09 20:04             ` Ingo Molnar
  2008-07-09 20:17               ` [patch] power, xen64: fix PM_SLEEP build dependencies Jeremy Fitzhardinge
@ 2008-07-09 20:17               ` Rafael J. Wysocki
  2008-07-09 20:23                 ` [patch] power, xen64: fix PM_SLEEP build dependencies Jeremy Fitzhardinge
  2008-07-09 20:23                 ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
  1 sibling, 2 replies; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-07-09 20:17 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jeremy Fitzhardinge, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin

On Wednesday, 9 of July 2008, Ingo Molnar wrote:
> 
> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
> 
> > > This combination actually doesn't make sense whatsoever.
> > > 
> > > PM_SLEEP depends (indirectly) on PM and the fact that it's possible 
> > > to use a .config violating this dependency is a build system 
> > > problem, really.
> > 
> > Your patch is correct, though. :-)
> 
> yes, that combination doesnt make sense in -git, but tip/xen64 tries the 
> !PM && PM_SLEEP combination - see the patch below.

It shouldn't.  There are many things compiled if PM is set that PM_SLEEP
depends on, most importantly ->suspend() and ->resume() callbacks of almost
all drivers.  Ignoring this dependency is asking for trouble.

> What do you think about that patch?
> 
> I think Jeremy's patch makes sense, but no strong feelings. We can 
> certainly map out these side-effects. (the fixes you've been Cc:-ed to 
> should be roughly all that can happen i think.)
 
> ------------------->
> commit 6fbbec428c8e7bb617da2e8a589af2e97bcf3bc4
> Author: Jeremy Fitzhardinge <jeremy@goop.org>
> Date:   Tue Jul 8 15:07:08 2008 -0700
> 
>     suspend, xen: enable PM_SLEEP for CONFIG_XEN
>     
>     Xen save/restore requires PM_SLEEP to be set without requiring
>     SUSPEND or HIBERNATION.
>     
>     Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
>     Cc: Stephen Tweedie <sct@redhat.com>
>     Cc: Eduardo Habkost <ehabkost@redhat.com>
>     Cc: Mark McLoughlin <markmc@redhat.com>
>     Signed-off-by: Ingo Molnar <mingo@elte.hu>
> 
> diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
> index b45da40..1436c47 100644
> --- a/kernel/power/Kconfig
> +++ b/kernel/power/Kconfig
> @@ -82,7 +82,7 @@ config PM_SLEEP_SMP
>  
>  config PM_SLEEP
>  	bool
> -	depends on SUSPEND || HIBERNATION
> +	depends on SUSPEND || HIBERNATION || XEN
>  	default y
>  
>  config SUSPEND

I don't like this.

I'll have a look at the remaining related patches and see what can be done.

Jeremy, what exactly do you need the !PM && PM_SLEEP combination for, BTW?

Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies
  2008-07-09 20:17               ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Rafael J. Wysocki
@ 2008-07-09 20:23                 ` Jeremy Fitzhardinge
  2008-07-09 20:26                   ` Ingo Molnar
  2008-07-09 20:33                   ` Rafael J. Wysocki
  2008-07-09 20:23                 ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
  1 sibling, 2 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-09 20:23 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Ingo Molnar, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin

Rafael J. Wysocki wrote:
> On Wednesday, 9 of July 2008, Ingo Molnar wrote:
>   
>> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
>>
>>     
>>>> This combination actually doesn't make sense whatsoever.
>>>>
>>>> PM_SLEEP depends (indirectly) on PM and the fact that it's possible 
>>>> to use a .config violating this dependency is a build system 
>>>> problem, really.
>>>>         
>>> Your patch is correct, though. :-)
>>>       
>> yes, that combination doesnt make sense in -git, but tip/xen64 tries the 
>> !PM && PM_SLEEP combination - see the patch below.
>>     
>
> It shouldn't.  There are many things compiled if PM is set that PM_SLEEP
> depends on, most importantly ->suspend() and ->resume() callbacks of almost
> all drivers.  Ignoring this dependency is asking for trouble.
>   

Doesn't that mean that PM_SLEEP should depend on PM?

> I don't like this.
>
> I'll have a look at the remaining related patches and see what can be done.
>
> Jeremy, what exactly do you need the !PM && PM_SLEEP combination for, BTW?
>   

See drivers/xen/manage.c.  On the Xen save/restore path, I call 
device_(suspend|resume|power_down|power_up)(), primarily to get the 
system timers into a good state after the machine has been away for a 
while.  Xen doesn't have a general dependency on the rest of the PM 
infrastructure.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support)
  2008-07-09 20:17               ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Rafael J. Wysocki
  2008-07-09 20:23                 ` [patch] power, xen64: fix PM_SLEEP build dependencies Jeremy Fitzhardinge
@ 2008-07-09 20:23                 ` Ingo Molnar
  2008-07-09 20:40                   ` [patch] power, xen64: fix PM_SLEEP build dependencies Jeremy Fitzhardinge
  1 sibling, 1 reply; 80+ messages in thread
From: Ingo Molnar @ 2008-07-09 20:23 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Jeremy Fitzhardinge, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin


* Rafael J. Wysocki <rjw@sisk.pl> wrote:

> >  config PM_SLEEP
> >  	bool
> > -	depends on SUSPEND || HIBERNATION
> > +	depends on SUSPEND || HIBERNATION || XEN
> >  	default y
> >  
> >  config SUSPEND
> 
> I don't like this.
> 
> I'll have a look at the remaining related patches and see what can be 
> done.
> 
> Jeremy, what exactly do you need the !PM && PM_SLEEP combination for, 
> BTW?

ok, i've reverted this from tip/master now, until you and Jeremy work 
out the details. (it shouldnt be a big limitation on xen64 anyway)

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies
  2008-07-09 20:23                 ` [patch] power, xen64: fix PM_SLEEP build dependencies Jeremy Fitzhardinge
@ 2008-07-09 20:26                   ` Ingo Molnar
  2008-07-09 20:33                   ` Rafael J. Wysocki
  1 sibling, 0 replies; 80+ messages in thread
From: Ingo Molnar @ 2008-07-09 20:26 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Rafael J. Wysocki, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin


* Jeremy Fitzhardinge <jeremy@goop.org> wrote:

>> I'll have a look at the remaining related patches and see what can be 
>> done.
>>
>> Jeremy, what exactly do you need the !PM && PM_SLEEP combination for, 
>> BTW?
>
> See drivers/xen/manage.c.  On the Xen save/restore path, I call 
> device_(suspend|resume|power_down|power_up)(), primarily to get the 
> system timers into a good state after the machine has been away for a 
> while.  Xen doesn't have a general dependency on the rest of the PM 
> infrastructure.

see the last config link i sent - even with my fixes i couldnt get it to 
work. So there were quite a few dependencies i think.

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies
  2008-07-09 20:23                 ` [patch] power, xen64: fix PM_SLEEP build dependencies Jeremy Fitzhardinge
  2008-07-09 20:26                   ` Ingo Molnar
@ 2008-07-09 20:33                   ` Rafael J. Wysocki
  2008-07-09 20:39                     ` Jeremy Fitzhardinge
  1 sibling, 1 reply; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-07-09 20:33 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ingo Molnar, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin

On Wednesday, 9 of July 2008, Jeremy Fitzhardinge wrote:
> Rafael J. Wysocki wrote:
> > On Wednesday, 9 of July 2008, Ingo Molnar wrote:
> >   
> >> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
> >>
> >>     
> >>>> This combination actually doesn't make sense whatsoever.
> >>>>
> >>>> PM_SLEEP depends (indirectly) on PM and the fact that it's possible 
> >>>> to use a .config violating this dependency is a build system 
> >>>> problem, really.
> >>>>         
> >>> Your patch is correct, though. :-)
> >>>       
> >> yes, that combination doesnt make sense in -git, but tip/xen64 tries the 
> >> !PM && PM_SLEEP combination - see the patch below.
> >>     
> >
> > It shouldn't.  There are many things compiled if PM is set that PM_SLEEP
> > depends on, most importantly ->suspend() and ->resume() callbacks of almost
> > all drivers.  Ignoring this dependency is asking for trouble.
> >   
> 
> Doesn't that mean that PM_SLEEP should depend on PM?

It does.  And it depends on PM, because HIBERNATION and SUSPEND both depend
on PM.

> > I don't like this.
> >
> > I'll have a look at the remaining related patches and see what can be done.
> >
> > Jeremy, what exactly do you need the !PM && PM_SLEEP combination for, BTW?
> >   
> 
> See drivers/xen/manage.c.  On the Xen save/restore path, I call 
> device_(suspend|resume|power_down|power_up)(), primarily to get the 
> system timers into a good state after the machine has been away for a 
> while.  Xen doesn't have a general dependency on the rest of the PM 
> infrastructure.

Well, device drivers' ->suspend() and ->resume() callbacks generally depend on
PM instead of PM_SLEEP (historical thing), so in fact you need the dependency
on PM.

In future we may be able to clean that up, but only after the new
suspend/hibernation framework reaches the mainline and drivers start to use it.

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies
  2008-07-09 20:33                   ` Rafael J. Wysocki
@ 2008-07-09 20:39                     ` Jeremy Fitzhardinge
  2008-07-09 20:42                       ` Ingo Molnar
  2008-07-09 20:53                       ` Rafael J. Wysocki
  0 siblings, 2 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-09 20:39 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Ingo Molnar, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin

Rafael J. Wysocki wrote:
> It does.  And it depends on PM, because HIBERNATION and SUSPEND both depend
> on PM.
>   

OK, so can we change PM_SLEEP to depend on PM && (HIBERNATE || SUSPEND 
|| XEN)?

Or I could add a separate XEN_SAVE_RESTORE config which depends on PM, 
and then we make PM_SLEEP depend on XEN_SAVE_RESTORE.

    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies
  2008-07-09 20:23                 ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
@ 2008-07-09 20:40                   ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 80+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-09 20:40 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rafael J. Wysocki, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin

Ingo Molnar wrote:
> ok, i've reverted this from tip/master now, until you and Jeremy work 
> out the details. (it shouldnt be a big limitation on xen64 anyway)
>   

Yes, that's fine.  It just means you lose Xen save restore unless you 
also have SUSPEND or HIBERNATE enabled, which is OK.  I only found it 
because I'm currently using an allnoconfig config with just enough to 
make Xen work turned on.


    J

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies
  2008-07-09 20:39                     ` Jeremy Fitzhardinge
@ 2008-07-09 20:42                       ` Ingo Molnar
  2008-07-09 20:53                       ` Rafael J. Wysocki
  1 sibling, 0 replies; 80+ messages in thread
From: Ingo Molnar @ 2008-07-09 20:42 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Rafael J. Wysocki, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin


* Jeremy Fitzhardinge <jeremy@goop.org> wrote:

> Rafael J. Wysocki wrote:
>> It does.  And it depends on PM, because HIBERNATION and SUSPEND both depend
>> on PM.
>>   
>
> OK, so can we change PM_SLEEP to depend on PM && (HIBERNATE || SUSPEND
> || XEN)?
>
> Or I could add a separate XEN_SAVE_RESTORE config which depends on PM, 
> and then we make PM_SLEEP depend on XEN_SAVE_RESTORE.

another way would be to select PM_SLEEP if PM is enabled, from within 
XEN. (this is one of the rare cases where select is valid - as PM_SLEEP 
is not an interactive option)

	Ingo

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [patch] power, xen64: fix PM_SLEEP build dependencies
  2008-07-09 20:39                     ` Jeremy Fitzhardinge
  2008-07-09 20:42                       ` Ingo Molnar
@ 2008-07-09 20:53                       ` Rafael J. Wysocki
  1 sibling, 0 replies; 80+ messages in thread
From: Rafael J. Wysocki @ 2008-07-09 20:53 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ingo Molnar, LKML, x86, Stephen Tweedie, Eduardo Habkost,
	Mark McLoughlin

On Wednesday, 9 of July 2008, Jeremy Fitzhardinge wrote:
> Rafael J. Wysocki wrote:
> > It does.  And it depends on PM, because HIBERNATION and SUSPEND both depend
> > on PM.
> >   
> 
> OK, so can we change PM_SLEEP to depend on PM && (HIBERNATE || SUSPEND 
> || XEN)?

I'd rather like it to be (HIBERNATE || SUSPEND || (XEN && PM)), but that would be
slightly confusing IMO.
 
> Or I could add a separate XEN_SAVE_RESTORE config which depends on PM, 
> and then we make PM_SLEEP depend on XEN_SAVE_RESTORE.

I prefer this one.

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 80+ messages in thread

end of thread, other threads:[~2008-07-09 20:51 UTC | newest]

Thread overview: 80+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-07-08 22:06 [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 01 of 55] x86/paravirt: Call paravirt_pagetable_setup_{start, done} Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 02 of 55] pvops-64: call paravirt_post_allocator_init() on setup_arch() Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 03 of 55] x86_64: there's no need to preallocate level1_fixmap_pgt Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 04 of 55] x86: clean up formatting of __switch_to Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 05 of 55] x86: use __page_aligned_data/bss Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 06 of 55] x86_64: adjust exception frame in ia32entry Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 07 of 55] x86_64: unstatic get_local_pda Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 08 of 55] xen: print backtrace on multicall failure Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 09 of 55] xen-netfront: fix xennet_release_tx_bufs() Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 10 of 55] xen: add xen_arch_resume()/xen_timer_resume hook for ia64 support Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 11 of 55] xen: define set_pte from the outset Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 12 of 55] xen64: define asm/xen/interface for 64-bit Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 13 of 55] xen: make ELF notes work for 32 and 64 bit Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 14 of 55] xen: fix 64-bit hypercall variants Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 15 of 55] xen64: fix calls into hypercall page Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 16 of 55] xen64: add extra pv_mmu_ops Jeremy Fitzhardinge
2008-07-09  7:55   ` Mark McLoughlin
2008-07-09  8:02     ` Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 17 of 55] xen64: random ifdefs to mask out 32-bit only code Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 18 of 55] xen64: get active_mm from the pda Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 19 of 55] xen: move smp setup into smp.c Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 20 of 55] x86_64: add workaround for no %gs-based percpu Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 21 of 55] xen64: smp.c compile hacking Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 22 of 55] xen64: add xen-head code to head_64.S Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 23 of 55] xen64: add asm-offsets Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 24 of 55] xen64: add 64-bit assembler Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 25 of 55] xen64: use set_fixmap for shared_info structure Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 26 of 55] xen: cpu_detect is 32-bit only Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 27 of 55] xen64: add hypervisor callbacks for events, etc Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 28 of 55] xen64: early mapping setup Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 29 of 55] xen64: 64-bit starts using set_pte from very early Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 30 of 55] xen64: map an initial chunk of physical memory Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 31 of 55] xen32: create initial mappings like 64-bit Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 32 of 55] xen: fix truncation of machine address Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 33 of 55] xen64: use arbitrary_virt_to_machine for xen_set_pmd Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 34 of 55] xen: set num_processors Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 35 of 55] xen64: defer setting pagetable alloc/release ops Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 36 of 55] xen: use set_pte_vaddr Jeremy Fitzhardinge
2008-07-08 22:06 ` [PATCH 37 of 55] xen64: xen_write_idt_entry() and cvt_gate_to_trap() Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 38 of 55] xen64: deal with extra words Xen pushes onto exception frames Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 39 of 55] xen64: add pvop for swapgs Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 40 of 55] xen64: register callbacks in arch-independent way Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 41 of 55] xen64: add identity irq->vector map Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 42 of 55] Xen64: HYPERVISOR_set_segment_base() implementation Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 43 of 55] xen64: implement xen_load_gs_index() Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 44 of 55] xen: rework pgd_walk to deal with 32/64 bit Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 45 of 55] xen: make sure the kernel command line is right Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 46 of 55] xen: enable PM_SLEEP for CONFIG_XEN Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 47 of 55] xen64: implement failsafe callback Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 48 of 55] xen64: Clear %fs on xen_load_tls() Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 49 of 55] xen64: implement 64-bit update_descriptor Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 50 of 55] xen64: save lots of registers Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 51 of 55] xen64: allocate and manage user pagetables Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 52 of 55] xen64: set up syscall and sysenter entrypoints for 64-bit Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 53 of 55] xen64: set up userspace syscall patch Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 54 of 55] xen: implement Xen write_msr operation Jeremy Fitzhardinge
2008-07-08 22:07 ` [PATCH 55 of 55] xen: update Kconfig to allow 64-bit Xen Jeremy Fitzhardinge
2008-07-09 11:12 ` [PATCH 00 of 55] xen64: implement 64-bit Xen support Ingo Molnar
2008-07-09 11:16   ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
2008-07-09 19:47     ` Ingo Molnar
2008-07-09 19:52       ` Ingo Molnar
2008-07-09 19:59         ` Rafael J. Wysocki
2008-07-09 20:02           ` Rafael J. Wysocki
2008-07-09 20:04             ` Ingo Molnar
2008-07-09 20:17               ` [patch] power, xen64: fix PM_SLEEP build dependencies Jeremy Fitzhardinge
2008-07-09 20:17               ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Rafael J. Wysocki
2008-07-09 20:23                 ` [patch] power, xen64: fix PM_SLEEP build dependencies Jeremy Fitzhardinge
2008-07-09 20:26                   ` Ingo Molnar
2008-07-09 20:33                   ` Rafael J. Wysocki
2008-07-09 20:39                     ` Jeremy Fitzhardinge
2008-07-09 20:42                       ` Ingo Molnar
2008-07-09 20:53                       ` Rafael J. Wysocki
2008-07-09 20:23                 ` [patch] power, xen64: fix PM_SLEEP build dependencies (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
2008-07-09 20:40                   ` [patch] power, xen64: fix PM_SLEEP build dependencies Jeremy Fitzhardinge
2008-07-09 11:21   ` [patch] xen64: fix !HVC_XEN build dependency (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
2008-07-09 16:07     ` [patch] xen64: fix !HVC_XEN build dependency Jeremy Fitzhardinge
2008-07-09 11:47   ` [patch] xen64: fix build error on 32-bit + !HIGHMEM (was: Re: [PATCH 00 of 55] xen64: implement 64-bit Xen support) Ingo Molnar
2008-07-09 16:07     ` [patch] xen64: fix build error on 32-bit + !HIGHMEM Jeremy Fitzhardinge
2008-07-09 16:12   ` [PATCH 00 of 55] xen64: implement 64-bit Xen support Jeremy Fitzhardinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).