linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [GIT PULL] x86/paravirt: allow preemption while doing lazy mmu update
       [not found]         ` <49C45238.7050007@zytor.com>
@ 2009-03-23 17:38           ` Jeremy Fitzhardinge
  2009-03-23 17:45           ` [GIT PULL] core Xen updates for 2.6.30 Jeremy Fitzhardinge
                             ` (28 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:38 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, the arch/x86 maintainers, Linux Kernel Mailing List,
	Xen-devel, Nick Piggin, Peter Zijlstra, Alok Kataria,
	Rusty Russell, Marcelo Tosatti

This series allows preemption during lazy mmu updates.

No changes from previous postings, just rebased to a newer tip/master.

The following changes since commit 8c85122f33c5242e2f384b1f9f202f28e6bf4e61:
  Ingo Molnar (1):
        Merge branch 'core/locking'

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push2/x86/paravirt

Jeremy Fitzhardinge (8):
      mm: disable preemption in apply_to_pte_range
      x86/paravirt: remove lazy mode in interrupts
      x86/pvops: replace arch_enter_lazy_cpu_mode with arch_start_context_switch
      x86/paravirt: flush pending mmu updates on context switch
      x86/paravirt: finish change from lazy cpu to context switch start/end
      x86/paravirt: allow preemption with lazy mmu mode
      mm: allow preemption in apply_to_pte_range
      x86/paravirt: use percpu_ rather than __get_cpu_var

 arch/x86/include/asm/paravirt.h    |   22 +++++++-------
 arch/x86/include/asm/pgtable.h     |    2 +
 arch/x86/include/asm/thread_info.h |    2 +
 arch/x86/kernel/kvm.c              |    2 +-
 arch/x86/kernel/paravirt.c         |   56 ++++++++++++++++-------------------
 arch/x86/kernel/process_32.c       |    2 +-
 arch/x86/kernel/process_64.c       |    2 +-
 arch/x86/kernel/vmi_32.c           |   20 ++++++++----
 arch/x86/lguest/boot.c             |   16 +++++++---
 arch/x86/mm/fault.c                |    6 +--
 arch/x86/mm/highmem_32.c           |    2 -
 arch/x86/mm/iomap_32.c             |    2 -
 arch/x86/mm/pageattr.c             |   14 ---------
 arch/x86/xen/enlighten.c           |   10 ++----
 arch/x86/xen/mmu.c                 |   20 +++++--------
 arch/x86/xen/xen-ops.h             |    1 -
 include/asm-frv/pgtable.h          |    4 +-
 include/asm-generic/pgtable.h      |   21 +++++++------
 kernel/sched.c                     |    2 +-
 19 files changed, 96 insertions(+), 110 deletions(-)

Overall diff:

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 7727aa8..bc384be 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -56,6 +56,7 @@ struct desc_ptr;
 struct tss_struct;
 struct mm_struct;
 struct desc_struct;
+struct task_struct;
 
 /*
  * Wrapper type for pointers to code which uses the non-standard
@@ -203,7 +204,8 @@ struct pv_cpu_ops {
 
 	void (*swapgs)(void);
 
-	struct pv_lazy_ops lazy_mode;
+	void (*start_context_switch)(struct task_struct *prev);
+	void (*end_context_switch)(struct task_struct *next);
 };
 
 struct pv_irq_ops {
@@ -1399,25 +1401,23 @@ enum paravirt_lazy_mode {
 };
 
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
-void paravirt_enter_lazy_cpu(void);
-void paravirt_leave_lazy_cpu(void);
+void paravirt_start_context_switch(struct task_struct *prev);
+void paravirt_end_context_switch(struct task_struct *next);
+
 void paravirt_enter_lazy_mmu(void);
 void paravirt_leave_lazy_mmu(void);
-void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
 
-#define  __HAVE_ARCH_ENTER_LAZY_CPU_MODE
-static inline void arch_enter_lazy_cpu_mode(void)
+#define  __HAVE_ARCH_START_CONTEXT_SWITCH
+static inline void arch_start_context_switch(struct task_struct *prev)
 {
-	PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
+	PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
 }
 
-static inline void arch_leave_lazy_cpu_mode(void)
+static inline void arch_end_context_switch(struct task_struct *next)
 {
-	PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
+	PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
 }
 
-void arch_flush_lazy_cpu_mode(void);
-
 #define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 static inline void arch_enter_lazy_mmu_mode(void)
 {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index f5ba6c1..205f6a9 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -81,6 +81,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
 #define pte_val(x)	native_pte_val(x)
 #define __pte(x)	native_make_pte(x)
 
+#define arch_end_context_switch(prev)	do {} while(0)
+
 #endif	/* CONFIG_PARAVIRT */
 
 /*
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 83d2b73..e6b4beb 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -96,6 +96,7 @@ struct thread_info {
 #define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
 #define TIF_DS_AREA_MSR		26      /* uses thread_struct.ds_area_msr */
 #define TIF_SYSCALL_FTRACE	27	/* for ftrace syscall instrumentation */
+#define TIF_LAZY_MMU_UPDATES	28	/* task is updating the mmu lazily */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -119,6 +120,7 @@ struct thread_info {
 #define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
 #define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
 #define _TIF_SYSCALL_FTRACE	(1 << TIF_SYSCALL_FTRACE)
+#define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
 
 /* work to do in syscall_trace_enter() */
 #define _TIF_WORK_SYSCALL_ENTRY	\
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 33019dd..6551ded 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -195,7 +195,7 @@ static void kvm_leave_lazy_mmu(void)
 	struct kvm_para_state *state = kvm_para_state();
 
 	mmu_queue_flush(state);
-	paravirt_leave_lazy(paravirt_get_lazy_mode());
+	paravirt_leave_lazy_mmu();
 	state->mode = paravirt_get_lazy_mode();
 }
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 8e45f44..aa34423 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -246,18 +246,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
 
 static inline void enter_lazy(enum paravirt_lazy_mode mode)
 {
-	BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
-	BUG_ON(preemptible());
+	BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
 
-	__get_cpu_var(paravirt_lazy_mode) = mode;
+	percpu_write(paravirt_lazy_mode, mode);
 }
 
-void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
+static void leave_lazy(enum paravirt_lazy_mode mode)
 {
-	BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode);
-	BUG_ON(preemptible());
+	BUG_ON(percpu_read(paravirt_lazy_mode) != mode);
 
-	__get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
+	percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
 }
 
 void paravirt_enter_lazy_mmu(void)
@@ -267,22 +265,36 @@ void paravirt_enter_lazy_mmu(void)
 
 void paravirt_leave_lazy_mmu(void)
 {
-	paravirt_leave_lazy(PARAVIRT_LAZY_MMU);
+	leave_lazy(PARAVIRT_LAZY_MMU);
 }
 
-void paravirt_enter_lazy_cpu(void)
+void paravirt_start_context_switch(struct task_struct *prev)
 {
+	BUG_ON(preemptible());
+
+	if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
+		arch_leave_lazy_mmu_mode();
+		set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
+	}
 	enter_lazy(PARAVIRT_LAZY_CPU);
 }
 
-void paravirt_leave_lazy_cpu(void)
+void paravirt_end_context_switch(struct task_struct *next)
 {
-	paravirt_leave_lazy(PARAVIRT_LAZY_CPU);
+	BUG_ON(preemptible());
+
+	leave_lazy(PARAVIRT_LAZY_CPU);
+
+	if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
+		arch_enter_lazy_mmu_mode();
 }
 
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 {
-	return __get_cpu_var(paravirt_lazy_mode);
+	if (in_interrupt())
+		return PARAVIRT_LAZY_NONE;
+
+	return percpu_read(paravirt_lazy_mode);
 }
 
 void arch_flush_lazy_mmu_mode(void)
@@ -290,7 +302,6 @@ void arch_flush_lazy_mmu_mode(void)
 	preempt_disable();
 
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
-		WARN_ON(preempt_count() == 1);
 		arch_leave_lazy_mmu_mode();
 		arch_enter_lazy_mmu_mode();
 	}
@@ -298,19 +309,6 @@ void arch_flush_lazy_mmu_mode(void)
 	preempt_enable();
 }
 
-void arch_flush_lazy_cpu_mode(void)
-{
-	preempt_disable();
-
-	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
-		WARN_ON(preempt_count() == 1);
-		arch_leave_lazy_cpu_mode();
-		arch_enter_lazy_cpu_mode();
-	}
-
-	preempt_enable();
-}
-
 struct pv_info pv_info = {
 	.name = "bare hardware",
 	.paravirt_enabled = 0,
@@ -402,10 +400,8 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.set_iopl_mask = native_set_iopl_mask,
 	.io_delay = native_io_delay,
 
-	.lazy_mode = {
-		.enter = paravirt_nop,
-		.leave = paravirt_nop,
-	},
+	.start_context_switch = paravirt_nop,
+	.end_context_switch = paravirt_nop,
 };
 
 struct pv_apic_ops pv_apic_ops = {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 14014d7..d766c76 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_leave_lazy_cpu_mode();
+	arch_end_context_switch(next_p);
 
 	/* If the task has used fpu the last 5 timeslices, just do a full
 	 * restore of the math state immediately to avoid the trap; the
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index abb7e6a..e8a9aaf 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_leave_lazy_cpu_mode();
+	arch_end_context_switch(next_p);
 
 	/*
 	 * Switch FS and GS.
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 95deb9f..b263423 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -462,22 +462,28 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
 }
 #endif
 
-static void vmi_enter_lazy_cpu(void)
+static void vmi_start_context_switch(struct task_struct *prev)
 {
-	paravirt_enter_lazy_cpu();
+	paravirt_start_context_switch(prev);
 	vmi_ops.set_lazy_mode(2);
 }
 
+static void vmi_end_context_switch(struct task_struct *next)
+{
+	vmi_ops.set_lazy_mode(0);
+	paravirt_end_context_switch(next);
+}
+
 static void vmi_enter_lazy_mmu(void)
 {
 	paravirt_enter_lazy_mmu();
 	vmi_ops.set_lazy_mode(1);
 }
 
-static void vmi_leave_lazy(void)
+static void vmi_leave_lazy_mmu(void)
 {
-	paravirt_leave_lazy(paravirt_get_lazy_mode());
 	vmi_ops.set_lazy_mode(0);
+	paravirt_leave_lazy_mmu();
 }
 
 static inline int __init check_vmi_rom(struct vrom_header *rom)
@@ -711,14 +717,14 @@ static inline int __init activate_vmi(void)
 	para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
 	para_fill(pv_cpu_ops.io_delay, IODelay);
 
-	para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu,
+	para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch,
 		  set_lazy_mode, SetLazyMode);
-	para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy,
+	para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch,
 		  set_lazy_mode, SetLazyMode);
 
 	para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
 		  set_lazy_mode, SetLazyMode);
-	para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy,
+	para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu,
 		  set_lazy_mode, SetLazyMode);
 
 	/* user and kernel flush are just handled with different flags to FlushTLB */
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 90e44a1..25799f3 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -147,10 +147,16 @@ static void lazy_hcall(unsigned long call,
 
 /* When lazy mode is turned off reset the per-cpu lazy mode variable and then
  * issue the do-nothing hypercall to flush any stored calls. */
-static void lguest_leave_lazy_mode(void)
+static void lguest_leave_lazy_mmu_mode(void)
 {
-	paravirt_leave_lazy(paravirt_get_lazy_mode());
 	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
+	paravirt_leave_lazy_mmu();
+}
+
+static void lguest_end_context_switch(struct task_struct *next)
+{
+	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
+	paravirt_end_context_switch(next);
 }
 
 /*G:033
@@ -1025,8 +1031,8 @@ __init void lguest_init(void)
 	pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
 	pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
 	pv_cpu_ops.wbinvd = lguest_wbinvd;
-	pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
-	pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
+	pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
+	pv_cpu_ops.end_context_switch = lguest_end_context_switch;
 
 	/* pagetable management */
 	pv_mmu_ops.write_cr3 = lguest_write_cr3;
@@ -1039,7 +1045,7 @@ __init void lguest_init(void)
 	pv_mmu_ops.read_cr2 = lguest_read_cr2;
 	pv_mmu_ops.read_cr3 = lguest_read_cr3;
 	pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
-	pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
+	pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	/* apic read/write intercepts */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f70b901..09e6ae4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -227,12 +227,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 	if (!pmd_present(*pmd_k))
 		return NULL;
 
-	if (!pmd_present(*pmd)) {
+	if (!pmd_present(*pmd))
 		set_pmd(pmd, *pmd_k);
-		arch_flush_lazy_mmu_mode();
-	} else {
+	else
 		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
-	}
 
 	return pmd_k;
 }
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 522db5e..17d0103 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -87,7 +87,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	BUG_ON(!pte_none(*(kmap_pte-idx)));
 	set_pte(kmap_pte-idx, mk_pte(page, prot));
-	arch_flush_lazy_mmu_mode();
 
 	return (void *)vaddr;
 }
@@ -117,7 +116,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
 #endif
 	}
 
-	arch_flush_lazy_mmu_mode();
 	pagefault_enable();
 }
 
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 699c9b2..0c16a33 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -41,7 +41,6 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
 	idx = type + KM_TYPE_NR * smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
-	arch_flush_lazy_mmu_mode();
 
 	return (void *)vaddr;
 }
@@ -80,7 +79,6 @@ iounmap_atomic(void *kvaddr, enum km_type type)
 	if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
 		kpte_clear_flush(kmap_pte-idx, vaddr);
 
-	arch_flush_lazy_mmu_mode();
 	pagefault_enable();
 }
 EXPORT_SYMBOL_GPL(iounmap_atomic);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index b0e5adb..1224865 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -844,13 +844,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 
 	vm_unmap_aliases();
 
-	/*
-	 * If we're called with lazy mmu updates enabled, the
-	 * in-memory pte state may be stale.  Flush pending updates to
-	 * bring them up to date.
-	 */
-	arch_flush_lazy_mmu_mode();
-
 	cpa.vaddr = addr;
 	cpa.pages = pages;
 	cpa.numpages = numpages;
@@ -895,13 +888,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 	} else
 		cpa_flush_all(cache);
 
-	/*
-	 * If we've been called with lazy mmu updates enabled, then
-	 * make sure that everything gets flushed out before we
-	 * return.
-	 */
-	arch_flush_lazy_mmu_mode();
-
 out:
 	return ret;
 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 82cd39a..70b355d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -203,10 +203,10 @@ static unsigned long xen_get_debugreg(int reg)
 	return HYPERVISOR_get_debugreg(reg);
 }
 
-void xen_leave_lazy(void)
+static void xen_end_context_switch(struct task_struct *next)
 {
-	paravirt_leave_lazy(paravirt_get_lazy_mode());
 	xen_mc_flush();
+	paravirt_end_context_switch(next);
 }
 
 static unsigned long xen_store_tr(void)
@@ -817,10 +817,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	/* Xen takes care of %gs when switching to usermode for us */
 	.swapgs = paravirt_nop,
 
-	.lazy_mode = {
-		.enter = paravirt_enter_lazy_cpu,
-		.leave = xen_leave_lazy,
-	},
+	.start_context_switch = paravirt_start_context_switch,
+	.end_context_switch = xen_end_context_switch,
 };
 
 static const struct pv_apic_ops xen_apic_ops __initdata = {
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index db3802f..e194f72 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -419,10 +419,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval)
 {
-	/* updates to init_mm may be done without lock */
-	if (mm == &init_mm)
-		preempt_disable();
-
 	ADD_STATS(set_pte_at, 1);
 //	ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
 	ADD_STATS(set_pte_at_current, mm == current->mm);
@@ -443,9 +439,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	}
 	xen_set_pte(ptep, pteval);
 
-out:
-	if (mm == &init_mm)
-		preempt_enable();
+out:	return;
 }
 
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
@@ -1119,10 +1113,8 @@ static void drop_other_mm_ref(void *info)
 
 	/* If this cpu still has a stale cr3 reference, then make sure
 	   it has been flushed. */
-	if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
+	if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
 		load_cr3(swapper_pg_dir);
-		arch_flush_lazy_cpu_mode();
-	}
 }
 
 static void xen_drop_mm_ref(struct mm_struct *mm)
@@ -1135,7 +1127,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
 			load_cr3(swapper_pg_dir);
 		else
 			leave_mm(smp_processor_id());
-		arch_flush_lazy_cpu_mode();
 	}
 
 	/* Get the "official" set of cpus referring to our pagetable. */
@@ -1819,6 +1810,11 @@ __init void xen_post_allocator_init(void)
 	xen_mark_init_mm_pinned();
 }
 
+static void xen_leave_lazy_mmu(void)
+{
+	xen_mc_flush();
+	paravirt_leave_lazy_mmu();
+}
 
 const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.pagetable_setup_start = xen_pagetable_setup_start,
@@ -1893,7 +1889,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 	.lazy_mode = {
 		.enter = paravirt_enter_lazy_mmu,
-		.leave = xen_leave_lazy,
+		.leave = xen_leave_lazy_mmu,
 	},
 
 	.set_fixmap = xen_set_fixmap,
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f5ef26..f897cdf 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -30,7 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 void xen_ident_map_ISA(void);
 void xen_reserve_top(void);
 
-void xen_leave_lazy(void);
 void xen_post_allocator_init(void);
 
 char * __init xen_memory_setup(void);
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index e16fdb1..0988704 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -73,8 +73,8 @@ static inline int pte_file(pte_t pte) { return 0; }
 #define pgtable_cache_init()		do {} while (0)
 #define arch_enter_lazy_mmu_mode()	do {} while (0)
 #define arch_leave_lazy_mmu_mode()	do {} while (0)
-#define arch_enter_lazy_cpu_mode()	do {} while (0)
-#define arch_leave_lazy_cpu_mode()	do {} while (0)
+
+#define arch_start_context_switch(prev)	do {} while (0)
 
 #else /* !CONFIG_MMU */
 /*****************************************************************************/
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 8e6d0ca..e410f60 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 #endif
 
 /*
- * A facility to provide batching of the reload of page tables with the
- * actual context switch code for paravirtualized guests.  By convention,
- * only one of the lazy modes (CPU, MMU) should be active at any given
- * time, entry should never be nested, and entry and exits should always
- * be paired.  This is for sanity of maintaining and reasoning about the
- * kernel code.
+ * A facility to provide batching of the reload of page tables and
+ * other process state with the actual context switch code for
+ * paravirtualized guests.  By convention, only one of the batched
+ * update (lazy) modes (CPU, MMU) should be active at any given time,
+ * entry should never be nested, and entry and exits should always be
+ * paired.  This is for sanity of maintaining and reasoning about the
+ * kernel code.  In this case, the exit (end of the context switch) is
+ * in architecture-specific code, and so doesn't need a generic
+ * definition.
  */
-#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
-#define arch_enter_lazy_cpu_mode()	do {} while (0)
-#define arch_leave_lazy_cpu_mode()	do {} while (0)
-#define arch_flush_lazy_cpu_mode()	do {} while (0)
+#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
+#define arch_start_context_switch(prev)	do {} while (0)
 #endif
 
 #ifndef __HAVE_PFNMAP_TRACKING
diff --git a/kernel/sched.c b/kernel/sched.c
index 3e827b8..77b43e3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2804,7 +2804,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 * combine the page table reload and the switch backend into
 	 * one hypercall.
 	 */
-	arch_enter_lazy_cpu_mode();
+	arch_start_context_switch(prev);
 
 	if (unlikely(!mm)) {
 		next->active_mm = oldmm;



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [GIT PULL] core Xen updates for 2.6.30
       [not found]         ` <49C45238.7050007@zytor.com>
  2009-03-23 17:38           ` [GIT PULL] x86/paravirt: allow preemption while doing lazy mmu update Jeremy Fitzhardinge
@ 2009-03-23 17:45           ` Jeremy Fitzhardinge
  2009-03-23 17:55           ` [GIT PULL] xen domU control interfaces Jeremy Fitzhardinge
                             ` (27 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:45 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, the arch/x86 maintainers, Xen-devel,
	Linux Kernel Mailing List

This series updates core domU Xen support.

Changes since last posting:

    * rebased to newer tip/master
    * addressed hpa's review comments
    * little cleanups

Thanks,
    J

The following changes since commit 8d336ed83a87149bcae6b872fff1aed8be268342:
  Jeremy Fitzhardinge (1):
        x86/paravirt: use percpu_ rather than __get_cpu_var

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push2/xen/core

Alex Nixon (1):
      Xen: Add virt_to_pfn helper function

Hannes Eder (1):
      NULL noise: arch/x86/xen/smp.c

Jeremy Fitzhardinge (15):
      xen: disable preempt for leave_lazy_mmu
      xen: separate p2m allocation from setting
      xen: dynamically allocate p2m tables
      xen: split construction of p2m mfn tables from registration
      xen: clean up xen_load_gdt
      xen: make xen_load_gdt simpler
      xen: remove xen_load_gdt debug
      xen: reserve i386 Xen pagetables
      xen: mask XSAVE from cpuid
      xen: add FIX_TEXT_POKE to fixmap
      x86-64: remove PGE from must-have feature list
      x86-64: non-paravirt systems always has PSE and PGE
      xen/mmu: some early pagetable cleanups
      Revert "x86: create a non-zero sized bm_pte only when needed"
      xen/mmu: weaken flush_tlb_other test

 arch/x86/include/asm/required-features.h |    8 ++-
 arch/x86/include/asm/xen/page.h          |    3 +-
 arch/x86/mm/ioremap.c                    |   19 +---
 arch/x86/xen/enlighten.c                 |   76 +++++++++++---
 arch/x86/xen/mmu.c                       |  160 ++++++++++++++++++++++--------
 arch/x86/xen/mmu.h                       |    3 +
 arch/x86/xen/smp.c                       |    4 +-
 arch/x86/xen/xen-ops.h                   |    2 -
 8 files changed, 196 insertions(+), 79 deletions(-)

diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index d5cd6c5..64cf2d2 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -48,9 +48,15 @@
 #endif
 
 #ifdef CONFIG_X86_64
+#ifdef CONFIG_PARAVIRT
+/* Paravirtualized systems may not have PSE or PGE available */
 #define NEED_PSE	0
+#define NEED_PGE	0
+#else
+#define NEED_PSE	(1<<(X86_FEATURE_PSE) & 31)
+#define NEED_PGE	(1<<(X86_FEATURE_PGE) & 31)
+#endif
 #define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
-#define NEED_PGE	(1<<(X86_FEATURE_PGE & 31))
 #define NEED_FXSR	(1<<(X86_FEATURE_FXSR & 31))
 #define NEED_XMM	(1<<(X86_FEATURE_XMM & 31))
 #define NEED_XMM2	(1<<(X86_FEATURE_XMM2 & 31))
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 1a918dd..018a0a4 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
 
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)	(phys_to_machine(XPADDR(__pa(v))))
-#define virt_to_mfn(v)		(pfn_to_mfn(PFN_DOWN(__pa(v))))
+#define virt_to_pfn(v)          (PFN_DOWN(__pa(v)))
+#define virt_to_mfn(v)		(pfn_to_mfn(virt_to_pfn(v)))
 #define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
 static inline unsigned long pte_mfn(pte_t pte)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 55e127f..83ed74a 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -487,12 +487,7 @@ static int __init early_ioremap_debug_setup(char *str)
 early_param("early_ioremap_debug", early_ioremap_debug_setup);
 
 static __initdata int after_paging_init;
-#define __FIXADDR_TOP (-PAGE_SIZE)
-static pte_t bm_pte[(__fix_to_virt(FIX_DBGP_BASE)
-		     ^ __fix_to_virt(FIX_BTMAP_BEGIN)) >> PMD_SHIFT
-		    ? PAGE_SIZE / sizeof(pte_t) : 0] __page_aligned_bss;
-#undef __FIXADDR_TOP
-static __initdata pte_t *bm_ptep;
+static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
 
 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 {
@@ -507,8 +502,6 @@ static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 
 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
 {
-	if (!sizeof(bm_pte))
-		return &bm_ptep[pte_index(addr)];
 	return &bm_pte[pte_index(addr)];
 }
 
@@ -526,14 +519,8 @@ void __init early_ioremap_init(void)
 		slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
 
 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
-	if (sizeof(bm_pte)) {
-		memset(bm_pte, 0, sizeof(bm_pte));
-		pmd_populate_kernel(&init_mm, pmd, bm_pte);
-	} else {
-		bm_ptep = pte_offset_kernel(pmd, 0);
-		if (early_ioremap_debug)
-			printk(KERN_INFO "bm_ptep=%p\n", bm_ptep);
-	}
+	memset(bm_pte, 0, sizeof(bm_pte));
+	pmd_populate_kernel(&init_mm, pmd, bm_pte);
 
 	/*
 	 * The boot-ioremap range spans multiple pmds, for which
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 70b355d..da33e0c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -168,21 +168,23 @@ static void __init xen_banner(void)
 	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
 }
 
+static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
+static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
+
 static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 		      unsigned int *cx, unsigned int *dx)
 {
+	unsigned maskecx = ~0;
 	unsigned maskedx = ~0;
 
 	/*
 	 * Mask out inconvenient features, to try and disable as many
 	 * unsupported kernel subsystems as possible.
 	 */
-	if (*ax == 1)
-		maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
-			    (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
-			    (1 << X86_FEATURE_MCE)  |  /* disable MCE */
-			    (1 << X86_FEATURE_MCA)  |  /* disable MCA */
-			    (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+	if (*ax == 1) {
+		maskecx = cpuid_leaf1_ecx_mask;
+		maskedx = cpuid_leaf1_edx_mask;
+	}
 
 	asm(XEN_EMULATE_PREFIX "cpuid"
 		: "=a" (*ax),
@@ -190,9 +192,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 		  "=c" (*cx),
 		  "=d" (*dx)
 		: "0" (*ax), "2" (*cx));
+
+	*cx &= maskecx;
 	*dx &= maskedx;
 }
 
+static __init void xen_init_cpuid_mask(void)
+{
+	unsigned int ax, bx, cx, dx;
+
+	cpuid_leaf1_edx_mask =
+		~((1 << X86_FEATURE_MCE)  |  /* disable MCE */
+		  (1 << X86_FEATURE_MCA)  |  /* disable MCA */
+		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+
+	if (!xen_initial_domain())
+		cpuid_leaf1_edx_mask &=
+			~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
+			  (1 << X86_FEATURE_ACPI));  /* disable ACPI */
+
+	ax = 1;
+	xen_cpuid(&ax, &bx, &cx, &dx);
+
+	/* cpuid claims we support xsave; try enabling it to see what happens */
+	if (cx & (1 << (X86_FEATURE_XSAVE % 32))) {
+		unsigned long cr4;
+
+		set_in_cr4(X86_CR4_OSXSAVE);
+		
+		cr4 = read_cr4();
+
+		if ((cr4 & X86_CR4_OSXSAVE) == 0)
+			cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32));
+
+		clear_in_cr4(X86_CR4_OSXSAVE);
+	}
+}
+
 static void xen_set_debugreg(int reg, unsigned long val)
 {
 	HYPERVISOR_set_debugreg(reg, val);
@@ -284,12 +320,11 @@ static void xen_set_ldt(const void *addr, unsigned entries)
 
 static void xen_load_gdt(const struct desc_ptr *dtr)
 {
-	unsigned long *frames;
 	unsigned long va = dtr->address;
 	unsigned int size = dtr->size + 1;
 	unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+	unsigned long frames[pages];
 	int f;
-	struct multicall_space mcs;
 
 	/* A GDT can be up to 64k in size, which corresponds to 8192
 	   8-byte entries, or 16 4k pages.. */
@@ -297,19 +332,26 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 	BUG_ON(size > 65536);
 	BUG_ON(va & ~PAGE_MASK);
 
-	mcs = xen_mc_entry(sizeof(*frames) * pages);
-	frames = mcs.args;
-
 	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
-		frames[f] = arbitrary_virt_to_mfn((void *)va);
+		int level;
+		pte_t *ptep = lookup_address(va, &level);
+		unsigned long pfn, mfn;
+		void *virt;
+
+		BUG_ON(ptep == NULL);
+
+		pfn = pte_pfn(*ptep);
+		mfn = pfn_to_mfn(pfn);
+		virt = __va(PFN_PHYS(pfn));
+
+		frames[f] = mfn;
 
 		make_lowmem_page_readonly((void *)va);
-		make_lowmem_page_readonly(mfn_to_virt(frames[f]));
+		make_lowmem_page_readonly(virt);
 	}
 
-	MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
-
-	xen_mc_issue(PARAVIRT_LAZY_CPU);
+	if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+		BUG();
 }
 
 static void load_TLS_descriptor(struct thread_struct *t,
@@ -895,6 +937,8 @@ asmlinkage void __init xen_start_kernel(void)
 
 	xen_init_irq_ops();
 
+	xen_init_cpuid_mask();
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	/*
 	 * set up the basic apic ops.
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index e194f72..aa16ef4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -159,18 +159,14 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3);	 /* actual vcpu cr3 */
 #define TOP_ENTRIES		(MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
 
 /* Placeholder for holes in the address space */
-static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data =
-		{ [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
+static unsigned long *p2m_missing;
 
  /* Array of pointers to pages containing p2m entries */
-static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data =
-		{ [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
+static unsigned long **p2m_top;
 
 /* Arrays of p2m arrays expressed in mfns used for save/restore */
-static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss;
-
-static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
-	__page_aligned_bss;
+static unsigned long *p2m_top_mfn;
+static unsigned long *p2m_top_mfn_list;
 
 static inline unsigned p2m_top_index(unsigned long pfn)
 {
@@ -183,22 +179,35 @@ static inline unsigned p2m_index(unsigned long pfn)
 	return pfn % P2M_ENTRIES_PER_PAGE;
 }
 
+#define SIZE_TOP_MFN sizeof(*p2m_top_mfn) * TOP_ENTRIES
+#define SIZE_TOP_MFN_LIST sizeof(*p2m_top_mfn_list) *			\
+	(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)
+
+RESERVE_BRK(xen_top_mfn, SIZE_TOP_MFN);
+RESERVE_BRK(xen_top_mfn_list, SIZE_TOP_MFN_LIST);
+
 /* Build the parallel p2m_top_mfn structures */
-void xen_setup_mfn_list_list(void)
+static void __init xen_build_mfn_list_list(void)
 {
 	unsigned pfn, idx;
 
+	p2m_top_mfn = extend_brk(SIZE_TOP_MFN, PAGE_SIZE);
+	p2m_top_mfn_list = extend_brk(SIZE_TOP_MFN_LIST, PAGE_SIZE);
+
 	for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
 		unsigned topidx = p2m_top_index(pfn);
 
 		p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
 	}
 
-	for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
+	for (idx = 0; idx < (TOP_ENTRIES / P2M_ENTRIES_PER_PAGE); idx++) {
 		unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
 		p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
 	}
+}
 
+void xen_setup_mfn_list_list(void)
+{
 	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
 
 	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@@ -206,18 +215,34 @@ void xen_setup_mfn_list_list(void)
 	HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
 }
 
+#define SIZE_P2M_MISSING	sizeof(*p2m_missing) * P2M_ENTRIES_PER_PAGE
+#define SIZE_P2M_TOP		sizeof(*p2m_top) * TOP_ENTRIES
+RESERVE_BRK(xen_p2m_missing, SIZE_P2M_MISSING);
+RESERVE_BRK(xen_p2m_top, SIZE_P2M_TOP);
+
 /* Set up p2m_top to point to the domain-builder provided p2m pages */
 void __init xen_build_dynamic_phys_to_machine(void)
 {
 	unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
 	unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
 	unsigned pfn;
+	unsigned i;
+
+	p2m_missing = extend_brk(SIZE_P2M_MISSING, PAGE_SIZE);
+	for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
+		p2m_missing[i] = ~0ul;
+
+	p2m_top = extend_brk(SIZE_P2M_TOP, PAGE_SIZE);
+	for(i = 0; i < TOP_ENTRIES; i++)
+		p2m_top[i] = p2m_missing;
 
 	for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
 		unsigned topidx = p2m_top_index(pfn);
 
 		p2m_top[topidx] = &mfn_list[pfn];
 	}
+
+	xen_build_mfn_list_list();
 }
 
 unsigned long get_phys_to_machine(unsigned long pfn)
@@ -233,47 +258,74 @@ unsigned long get_phys_to_machine(unsigned long pfn)
 }
 EXPORT_SYMBOL_GPL(get_phys_to_machine);
 
-static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
+/* install a  new p2m_top page */
+bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
 {
-	unsigned long *p;
+	unsigned topidx = p2m_top_index(pfn);
+	unsigned long **pfnp, *mfnp;
 	unsigned i;
 
-	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
-	BUG_ON(p == NULL);
+	pfnp = &p2m_top[topidx];
+	mfnp = &p2m_top_mfn[topidx];
 
 	for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
 		p[i] = INVALID_P2M_ENTRY;
 
-	if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
-		free_page((unsigned long)p);
-	else
+	if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
 		*mfnp = virt_to_mfn(p);
+		return true;
+	}
+
+	return false;
 }
 
-void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+static void alloc_p2m(unsigned long pfn)
 {
-	unsigned topidx, idx;
+	unsigned long *p;
 
-	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
-		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
-		return;
-	}
+	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
+	BUG_ON(p == NULL);
+
+	if (!install_p2mtop_page(pfn, p))
+		free_page((unsigned long)p);
+}
+
+/* Try to install p2m mapping; fail if intermediate bits missing */
+bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+	unsigned topidx, idx;
 
 	if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
 		BUG_ON(mfn != INVALID_P2M_ENTRY);
-		return;
+		return true;
 	}
 
 	topidx = p2m_top_index(pfn);
 	if (p2m_top[topidx] == p2m_missing) {
-		/* no need to allocate a page to store an invalid entry */
 		if (mfn == INVALID_P2M_ENTRY)
-			return;
-		alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
+			return true;
+		return false;
 	}
 
 	idx = p2m_index(pfn);
 	p2m_top[topidx][idx] = mfn;
+
+	return true;
+}
+
+void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+		return;
+	}
+
+	if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
+		alloc_p2m(pfn);
+
+		if (!__set_phys_to_machine(pfn, mfn))
+			BUG();
+	}
 }
 
 unsigned long arbitrary_virt_to_mfn(void *vaddr)
@@ -981,7 +1033,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
 	return 0;
 }
 
-void __init xen_mark_init_mm_pinned(void)
+static void __init xen_mark_init_mm_pinned(void)
 {
 	xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
 }
@@ -1261,8 +1313,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 	} *args;
 	struct multicall_space mcs;
 
-	BUG_ON(cpumask_empty(cpus));
-	BUG_ON(!mm);
+	if (cpumask_empty(cpus))
+		return;		/* nothing to do */
 
 	mcs = xen_mc_entry(sizeof(*args));
 	args = mcs.args;
@@ -1429,6 +1481,15 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
 }
 #endif
 
+static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+{
+	struct mmuext_op op;
+	op.cmd = cmd;
+	op.arg1.mfn = pfn_to_mfn(pfn);
+	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+		BUG();
+}
+
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
 static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
@@ -1437,22 +1498,29 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
 	BUG_ON(mem_map);	/* should only be used early */
 #endif
 	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
+	pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+}
+
+/* Used for pmd and pud */
+static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
+{
+#ifdef CONFIG_FLATMEM
+	BUG_ON(mem_map);	/* should only be used early */
+#endif
+	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
 }
 
 /* Early release_pte assumes that all pts are pinned, since there's
    only init_mm and anything attached to that is pinned. */
-static void xen_release_pte_init(unsigned long pfn)
+static __init void xen_release_pte_init(unsigned long pfn)
 {
+	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
 
-static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+static __init void xen_release_pmd_init(unsigned long pfn)
 {
-	struct mmuext_op op;
-	op.cmd = cmd;
-	op.arg1.mfn = pfn_to_mfn(pfn);
-	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
-		BUG();
+	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
 
 /* This needs to make sure the new pte page is pinned iff its being
@@ -1737,6 +1805,11 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
 
 	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
 
+	reserve_early(__pa(xen_start_info->pt_base),
+		      __pa(xen_start_info->pt_base +
+			   xen_start_info->nr_pt_frames * PAGE_SIZE),
+		      "XEN PAGETABLES");
+
 	return swapper_pg_dir;
 }
 #endif	/* CONFIG_X86_64 */
@@ -1764,6 +1837,9 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
 #ifdef CONFIG_X86_LOCAL_APIC
 	case FIX_APIC_BASE:	/* maps dummy local APIC */
 #endif
+	case FIX_TEXT_POKE0:
+	case FIX_TEXT_POKE1:
+		/* All local page mappings */
 		pte = pfn_pte(phys, prot);
 		break;
 
@@ -1812,8 +1888,10 @@ __init void xen_post_allocator_init(void)
 
 static void xen_leave_lazy_mmu(void)
 {
+	preempt_disable();
 	xen_mc_flush();
 	paravirt_leave_lazy_mmu();
+	preempt_enable();
 }
 
 const struct pv_mmu_ops xen_mmu_ops __initdata = {
@@ -1839,9 +1917,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 	.alloc_pte = xen_alloc_pte_init,
 	.release_pte = xen_release_pte_init,
-	.alloc_pmd = xen_alloc_pte_init,
+	.alloc_pmd = xen_alloc_pmd_init,
 	.alloc_pmd_clone = paravirt_nop,
-	.release_pmd = xen_release_pte_init,
+	.release_pmd = xen_release_pmd_init,
 
 #ifdef CONFIG_HIGHPTE
 	.kmap_atomic_pte = xen_kmap_atomic_pte,
@@ -1879,8 +1957,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.make_pud = PV_CALLEE_SAVE(xen_make_pud),
 	.set_pgd = xen_set_pgd_hyper,
 
-	.alloc_pud = xen_alloc_pte_init,
-	.release_pud = xen_release_pte_init,
+	.alloc_pud = xen_alloc_pmd_init,
+	.release_pud = xen_release_pmd_init,
 #endif	/* PAGETABLE_LEVELS == 4 */
 
 	.activate_mm = xen_activate_mm,
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 24d1b44..da73026 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -11,6 +11,9 @@ enum pt_level {
 };
 
 
+bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+bool install_p2mtop_page(unsigned long pfn, unsigned long *p);
+
 void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 585a6e3..429834e 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
 	BUG_ON(rc);
 
 	while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+		HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
 		barrier();
 	}
 
@@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
 	/* Make sure other vcpus get a chance to run if they need to. */
 	for_each_cpu(cpu, mask) {
 		if (xen_vcpu_stolen(cpu)) {
-			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+			HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
 			break;
 		}
 	}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f897cdf..5c50a10 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -56,8 +56,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
 
 bool xen_vcpu_stolen(int vcpu);
 
-void xen_mark_init_mm_pinned(void);
-
 void xen_setup_vcpu_info_placement(void);
 
 #ifdef CONFIG_SMP



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [GIT PULL] xen domU control interfaces
       [not found]         ` <49C45238.7050007@zytor.com>
  2009-03-23 17:38           ` [GIT PULL] x86/paravirt: allow preemption while doing lazy mmu update Jeremy Fitzhardinge
  2009-03-23 17:45           ` [GIT PULL] core Xen updates for 2.6.30 Jeremy Fitzhardinge
@ 2009-03-23 17:55           ` Jeremy Fitzhardinge
  2009-03-23 17:55           ` [PATCH 01/11] xen: add irq_from_evtchn Jeremy Fitzhardinge
                             ` (26 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Xen-devel


This series adds some usermode interfaces for interfacing with Xen from
an unprivileged domain:
	- Xen support for /sys/hypervisor
	- /dev/evtchn, to allow usermode to block on and send events via
	  event channels
	- updates to xenfs (/proc/xen)
	- updates to the xenbus interface

Thanks,
	J

The following changes since commit 627b11ca44b334e201c35bf1a1582dc10efaf4a8:
  Jeremy Fitzhardinge (1):
        xen/mmu: weaken flush_tlb_other test

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push2/xen/control

Ian Campbell (6):
      xen: add irq_from_evtchn
      xen: add /dev/xen/evtchn driver
      xen: export ioctl headers to userspace
      xen: drop kexec bits from /sys/hypervisor since kexec isn't implemented yet
      xen: remove suspend_cancel hook
      xen: use device model for suspending xenbus devices

Jeremy Fitzhardinge (9):
      xen/dev-evtchn: clean up locking in evtchn
      xen: add "capabilities" file
      xen: add /sys/hypervisor support
      xen/sys/hypervisor: change writable_pt to features
      xen/xenbus: export xenbus_dev_changed
      Merge branch 'push2/xen/xenbus' into push2/xen/control
      Merge branch 'push2/xen/dev-evtchn' into push2/xen/control
      Merge branch 'push2/xen/sys-hypervisor' into push2/xen/control
      Merge branch 'push2/xen/xenfs' into push2/xen/control

 drivers/xen/Kconfig               |   20 ++
 drivers/xen/Makefile              |    4 +-
 drivers/xen/events.c              |    6 +
 drivers/xen/evtchn.c              |  507 +++++++++++++++++++++++++++++++++++++
 drivers/xen/manage.c              |    9 +-
 drivers/xen/sys-hypervisor.c      |  445 ++++++++++++++++++++++++++++++++
 drivers/xen/xenbus/xenbus_probe.c |   61 +----
 drivers/xen/xenbus/xenbus_xs.c    |    2 +
 drivers/xen/xenfs/super.c         |   19 ++-
 include/Kbuild                    |    1 +
 include/xen/Kbuild                |    1 +
 include/xen/events.h              |    3 +
 include/xen/evtchn.h              |   88 +++++++
 include/xen/interface/version.h   |    3 +
 include/xen/xenbus.h              |    3 +-
 15 files changed, 1112 insertions(+), 60 deletions(-)
 create mode 100644 drivers/xen/evtchn.c
 create mode 100644 drivers/xen/sys-hypervisor.c
 create mode 100644 include/xen/Kbuild
 create mode 100644 include/xen/evtchn.h


^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 01/11] xen: add irq_from_evtchn
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (2 preceding siblings ...)
  2009-03-23 17:55           ` [GIT PULL] xen domU control interfaces Jeremy Fitzhardinge
@ 2009-03-23 17:55           ` Jeremy Fitzhardinge
  2009-03-23 17:55           ` [PATCH 02/11] xen: add /dev/xen/evtchn driver Jeremy Fitzhardinge
                             ` (25 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Xen-devel,
	Ian Campbell, Jeremy Fitzhardinge

From: Ian Campbell <ian.campbell@citrix.com>

Given an evtchn, return the corresponding irq.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/events.c |    6 ++++++
 include/xen/events.h |    3 +++
 2 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 30963af..1cd2a0e 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -151,6 +151,12 @@ static unsigned int evtchn_from_irq(unsigned irq)
 	return info_for_irq(irq)->evtchn;
 }
 
+unsigned irq_from_evtchn(unsigned int evtchn)
+{
+	return evtchn_to_irq[evtchn];
+}
+EXPORT_SYMBOL_GPL(irq_from_evtchn);
+
 static enum ipi_vector ipi_from_irq(unsigned irq)
 {
 	struct irq_info *info = info_for_irq(irq);
diff --git a/include/xen/events.h b/include/xen/events.h
index 0d5f1ad..e68d59a 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq);
    irq will be disabled so it won't deliver an interrupt. */
 void xen_poll_irq(int irq);
 
+/* Determine the IRQ which is bound to an event channel */
+unsigned irq_from_evtchn(unsigned int evtchn);
+
 #endif	/* _XEN_EVENTS_H */
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 02/11] xen: add /dev/xen/evtchn driver
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (3 preceding siblings ...)
  2009-03-23 17:55           ` [PATCH 01/11] xen: add irq_from_evtchn Jeremy Fitzhardinge
@ 2009-03-23 17:55           ` Jeremy Fitzhardinge
  2009-03-23 17:55           ` [PATCH 03/11] xen: export ioctl headers to userspace Jeremy Fitzhardinge
                             ` (24 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Xen-devel,
	Ian Campbell, Jeremy Fitzhardinge

From: Ian Campbell <ian.campbell@citrix.com>

This driver is used by application which wish to receive notifications
from the hypervisor or other guests via Xen's event channel
mechanism. In particular it is used by the xenstore daemon in domain
0.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/Kconfig  |   10 +
 drivers/xen/Makefile |    3 +-
 drivers/xen/evtchn.c |  494 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/xen/evtchn.h |   88 +++++++++
 4 files changed, 594 insertions(+), 1 deletions(-)
 create mode 100644 drivers/xen/evtchn.c
 create mode 100644 include/xen/evtchn.h

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 526187c..1bbb910 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -18,6 +18,16 @@ config XEN_SCRUB_PAGES
 	  secure, but slightly less efficient.
 	  If in doubt, say yes.
 
+config XEN_DEV_EVTCHN
+	tristate "Xen /dev/xen/evtchn device"
+	depends on XEN
+	default y
+	help
+	  The evtchn driver allows a userspace process to triger event
+	  channels and to receive notification of an event channel
+	  firing.
+	  If in doubt, say yes.
+
 config XENFS
 	tristate "Xen filesystem"
 	depends on XEN
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index ff8accc..1567639 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -4,4 +4,5 @@ obj-y	+= xenbus/
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
-obj-$(CONFIG_XENFS)		+= xenfs/
\ No newline at end of file
+obj-$(CONFIG_XEN_DEV_EVTCHN)	+= evtchn.o
+obj-$(CONFIG_XENFS)		+= xenfs/
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
new file mode 100644
index 0000000..517b9ee
--- /dev/null
+++ b/drivers/xen/evtchn.c
@@ -0,0 +1,494 @@
+/******************************************************************************
+ * evtchn.c
+ *
+ * Driver for receiving and demuxing event-channel signals.
+ *
+ * Copyright (c) 2004-2005, K A Fraser
+ * Multi-process extensions Copyright (c) 2004, Steven Smith
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/miscdevice.h>
+#include <linux/major.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/poll.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <xen/events.h>
+#include <xen/evtchn.h>
+#include <asm/xen/hypervisor.h>
+
+struct per_user_data {
+	/* Notification ring, accessed via /dev/xen/evtchn. */
+#define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+	evtchn_port_t *ring;
+	unsigned int ring_cons, ring_prod, ring_overflow;
+	struct mutex ring_cons_mutex; /* protect against concurrent readers */
+
+	/* Processes wait on this queue when ring is empty. */
+	wait_queue_head_t evtchn_wait;
+	struct fasync_struct *evtchn_async_queue;
+	const char *name;
+};
+
+/* Who's bound to each port? */
+static struct per_user_data *port_user[NR_EVENT_CHANNELS];
+static DEFINE_SPINLOCK(port_user_lock);
+
+irqreturn_t evtchn_interrupt(int irq, void *data)
+{
+	unsigned int port = (unsigned long)data;
+	struct per_user_data *u;
+
+	spin_lock(&port_user_lock);
+
+	u = port_user[port];
+
+	disable_irq_nosync(irq);
+
+	if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
+		u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
+		wmb(); /* Ensure ring contents visible */
+		if (u->ring_cons == u->ring_prod++) {
+			wake_up_interruptible(&u->evtchn_wait);
+			kill_fasync(&u->evtchn_async_queue,
+				    SIGIO, POLL_IN);
+		}
+	} else {
+		u->ring_overflow = 1;
+	}
+
+	spin_unlock(&port_user_lock);
+
+	return IRQ_HANDLED;
+}
+
+static ssize_t evtchn_read(struct file *file, char __user *buf,
+			   size_t count, loff_t *ppos)
+{
+	int rc;
+	unsigned int c, p, bytes1 = 0, bytes2 = 0;
+	struct per_user_data *u = file->private_data;
+
+	/* Whole number of ports. */
+	count &= ~(sizeof(evtchn_port_t)-1);
+
+	if (count == 0)
+		return 0;
+
+	if (count > PAGE_SIZE)
+		count = PAGE_SIZE;
+
+	for (;;) {
+		mutex_lock(&u->ring_cons_mutex);
+
+		rc = -EFBIG;
+		if (u->ring_overflow)
+			goto unlock_out;
+
+		c = u->ring_cons;
+		p = u->ring_prod;
+		if (c != p)
+			break;
+
+		mutex_unlock(&u->ring_cons_mutex);
+
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		rc = wait_event_interruptible(u->evtchn_wait,
+					      u->ring_cons != u->ring_prod);
+		if (rc)
+			return rc;
+	}
+
+	/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+	if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
+		bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
+			sizeof(evtchn_port_t);
+		bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
+	} else {
+		bytes1 = (p - c) * sizeof(evtchn_port_t);
+		bytes2 = 0;
+	}
+
+	/* Truncate chunks according to caller's maximum byte count. */
+	if (bytes1 > count) {
+		bytes1 = count;
+		bytes2 = 0;
+	} else if ((bytes1 + bytes2) > count) {
+		bytes2 = count - bytes1;
+	}
+
+	rc = -EFAULT;
+	rmb(); /* Ensure that we see the port before we copy it. */
+	if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
+	    ((bytes2 != 0) &&
+	     copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
+		goto unlock_out;
+
+	u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
+	rc = bytes1 + bytes2;
+
+ unlock_out:
+	mutex_unlock(&u->ring_cons_mutex);
+	return rc;
+}
+
+static ssize_t evtchn_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	int rc, i;
+	evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+	struct per_user_data *u = file->private_data;
+
+	if (kbuf == NULL)
+		return -ENOMEM;
+
+	/* Whole number of ports. */
+	count &= ~(sizeof(evtchn_port_t)-1);
+
+	rc = 0;
+	if (count == 0)
+		goto out;
+
+	if (count > PAGE_SIZE)
+		count = PAGE_SIZE;
+
+	rc = -EFAULT;
+	if (copy_from_user(kbuf, buf, count) != 0)
+		goto out;
+
+	spin_lock_irq(&port_user_lock);
+	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
+		if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
+			enable_irq(irq_from_evtchn(kbuf[i]));
+	spin_unlock_irq(&port_user_lock);
+
+	rc = count;
+
+ out:
+	free_page((unsigned long)kbuf);
+	return rc;
+}
+
+static int evtchn_bind_to_user(struct per_user_data *u, int port)
+{
+	int irq;
+	int rc = 0;
+
+	spin_lock_irq(&port_user_lock);
+
+	BUG_ON(port_user[port] != NULL);
+
+	irq = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
+					u->name, (void *)(unsigned long)port);
+	if (rc < 0)
+		goto fail;
+
+	port_user[port] = u;
+
+fail:
+	spin_unlock_irq(&port_user_lock);
+	return rc;
+}
+
+static void evtchn_unbind_from_user(struct per_user_data *u, int port)
+{
+	int irq = irq_from_evtchn(port);
+
+	unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+	port_user[port] = NULL;
+}
+
+static long evtchn_ioctl(struct file *file,
+			 unsigned int cmd, unsigned long arg)
+{
+	int rc;
+	struct per_user_data *u = file->private_data;
+	void __user *uarg = (void __user *) arg;
+
+	switch (cmd) {
+	case IOCTL_EVTCHN_BIND_VIRQ: {
+		struct ioctl_evtchn_bind_virq bind;
+		struct evtchn_bind_virq bind_virq;
+
+		rc = -EFAULT;
+		if (copy_from_user(&bind, uarg, sizeof(bind)))
+			break;
+
+		bind_virq.virq = bind.virq;
+		bind_virq.vcpu = 0;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+						 &bind_virq);
+		if (rc != 0)
+			break;
+
+		rc = evtchn_bind_to_user(u, bind_virq.port);
+		if (rc == 0)
+			rc = bind_virq.port;
+		break;
+	}
+
+	case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
+		struct ioctl_evtchn_bind_interdomain bind;
+		struct evtchn_bind_interdomain bind_interdomain;
+
+		rc = -EFAULT;
+		if (copy_from_user(&bind, uarg, sizeof(bind)))
+			break;
+
+		bind_interdomain.remote_dom  = bind.remote_domain;
+		bind_interdomain.remote_port = bind.remote_port;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+						 &bind_interdomain);
+		if (rc != 0)
+			break;
+
+		rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
+		if (rc == 0)
+			rc = bind_interdomain.local_port;
+		break;
+	}
+
+	case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
+		struct ioctl_evtchn_bind_unbound_port bind;
+		struct evtchn_alloc_unbound alloc_unbound;
+
+		rc = -EFAULT;
+		if (copy_from_user(&bind, uarg, sizeof(bind)))
+			break;
+
+		alloc_unbound.dom        = DOMID_SELF;
+		alloc_unbound.remote_dom = bind.remote_domain;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+						 &alloc_unbound);
+		if (rc != 0)
+			break;
+
+		rc = evtchn_bind_to_user(u, alloc_unbound.port);
+		if (rc == 0)
+			rc = alloc_unbound.port;
+		break;
+	}
+
+	case IOCTL_EVTCHN_UNBIND: {
+		struct ioctl_evtchn_unbind unbind;
+
+		rc = -EFAULT;
+		if (copy_from_user(&unbind, uarg, sizeof(unbind)))
+			break;
+
+		rc = -EINVAL;
+		if (unbind.port >= NR_EVENT_CHANNELS)
+			break;
+
+		spin_lock_irq(&port_user_lock);
+
+		rc = -ENOTCONN;
+		if (port_user[unbind.port] != u) {
+			spin_unlock_irq(&port_user_lock);
+			break;
+		}
+
+		evtchn_unbind_from_user(u, unbind.port);
+
+		spin_unlock_irq(&port_user_lock);
+
+		rc = 0;
+		break;
+	}
+
+	case IOCTL_EVTCHN_NOTIFY: {
+		struct ioctl_evtchn_notify notify;
+
+		rc = -EFAULT;
+		if (copy_from_user(&notify, uarg, sizeof(notify)))
+			break;
+
+		if (notify.port >= NR_EVENT_CHANNELS) {
+			rc = -EINVAL;
+		} else if (port_user[notify.port] != u) {
+			rc = -ENOTCONN;
+		} else {
+			notify_remote_via_evtchn(notify.port);
+			rc = 0;
+		}
+		break;
+	}
+
+	case IOCTL_EVTCHN_RESET: {
+		/* Initialise the ring to empty. Clear errors. */
+		mutex_lock(&u->ring_cons_mutex);
+		spin_lock_irq(&port_user_lock);
+		u->ring_cons = u->ring_prod = u->ring_overflow = 0;
+		spin_unlock_irq(&port_user_lock);
+		mutex_unlock(&u->ring_cons_mutex);
+		rc = 0;
+		break;
+	}
+
+	default:
+		rc = -ENOSYS;
+		break;
+	}
+
+	return rc;
+}
+
+static unsigned int evtchn_poll(struct file *file, poll_table *wait)
+{
+	unsigned int mask = POLLOUT | POLLWRNORM;
+	struct per_user_data *u = file->private_data;
+
+	poll_wait(file, &u->evtchn_wait, wait);
+	if (u->ring_cons != u->ring_prod)
+		mask |= POLLIN | POLLRDNORM;
+	if (u->ring_overflow)
+		mask = POLLERR;
+	return mask;
+}
+
+static int evtchn_fasync(int fd, struct file *filp, int on)
+{
+	struct per_user_data *u = filp->private_data;
+	return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
+}
+
+static int evtchn_open(struct inode *inode, struct file *filp)
+{
+	struct per_user_data *u;
+
+	u = kzalloc(sizeof(*u), GFP_KERNEL);
+	if (u == NULL)
+		return -ENOMEM;
+
+	u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
+	if (u->name == NULL) {
+		kfree(u);
+		return -ENOMEM;
+	}
+
+	init_waitqueue_head(&u->evtchn_wait);
+
+	u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+	if (u->ring == NULL) {
+		kfree(u->name);
+		kfree(u);
+		return -ENOMEM;
+	}
+
+	mutex_init(&u->ring_cons_mutex);
+
+	filp->private_data = u;
+
+	return 0;
+}
+
+static int evtchn_release(struct inode *inode, struct file *filp)
+{
+	int i;
+	struct per_user_data *u = filp->private_data;
+
+	spin_lock_irq(&port_user_lock);
+
+	free_page((unsigned long)u->ring);
+
+	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+		if (port_user[i] != u)
+			continue;
+
+		evtchn_unbind_from_user(port_user[i], i);
+	}
+
+	spin_unlock_irq(&port_user_lock);
+
+	kfree(u->name);
+	kfree(u);
+
+	return 0;
+}
+
+static const struct file_operations evtchn_fops = {
+	.owner   = THIS_MODULE,
+	.read    = evtchn_read,
+	.write   = evtchn_write,
+	.unlocked_ioctl = evtchn_ioctl,
+	.poll    = evtchn_poll,
+	.fasync  = evtchn_fasync,
+	.open    = evtchn_open,
+	.release = evtchn_release,
+};
+
+static struct miscdevice evtchn_miscdev = {
+	.minor        = MISC_DYNAMIC_MINOR,
+	.name         = "evtchn",
+	.fops         = &evtchn_fops,
+};
+static int __init evtchn_init(void)
+{
+	int err;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	spin_lock_init(&port_user_lock);
+	memset(port_user, 0, sizeof(port_user));
+
+	/* Create '/dev/misc/evtchn'. */
+	err = misc_register(&evtchn_miscdev);
+	if (err != 0) {
+		printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+		return err;
+	}
+
+	printk(KERN_INFO "Event-channel device installed.\n");
+
+	return 0;
+}
+
+static void __exit evtchn_cleanup(void)
+{
+	misc_deregister(&evtchn_miscdev);
+}
+
+module_init(evtchn_init);
+module_exit(evtchn_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h
new file mode 100644
index 0000000..14e833e
--- /dev/null
+++ b/include/xen/evtchn.h
@@ -0,0 +1,88 @@
+/******************************************************************************
+ * evtchn.h
+ *
+ * Interface to /dev/xen/evtchn.
+ *
+ * Copyright (c) 2003-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_EVTCHN_H__
+#define __LINUX_PUBLIC_EVTCHN_H__
+
+/*
+ * Bind a fresh port to VIRQ @virq.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_VIRQ				\
+	_IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq))
+struct ioctl_evtchn_bind_virq {
+	unsigned int virq;
+};
+
+/*
+ * Bind a fresh port to remote <@remote_domain, @remote_port>.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_INTERDOMAIN			\
+	_IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain))
+struct ioctl_evtchn_bind_interdomain {
+	unsigned int remote_domain, remote_port;
+};
+
+/*
+ * Allocate a fresh port for binding to @remote_domain.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_UNBOUND_PORT			\
+	_IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port))
+struct ioctl_evtchn_bind_unbound_port {
+	unsigned int remote_domain;
+};
+
+/*
+ * Unbind previously allocated @port.
+ */
+#define IOCTL_EVTCHN_UNBIND				\
+	_IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind))
+struct ioctl_evtchn_unbind {
+	unsigned int port;
+};
+
+/*
+ * Unbind previously allocated @port.
+ */
+#define IOCTL_EVTCHN_NOTIFY				\
+	_IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify))
+struct ioctl_evtchn_notify {
+	unsigned int port;
+};
+
+/* Clear and reinitialise the event buffer. Clear error condition. */
+#define IOCTL_EVTCHN_RESET				\
+	_IOC(_IOC_NONE, 'E', 5, 0)
+
+#endif /* __LINUX_PUBLIC_EVTCHN_H__ */
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 03/11] xen: export ioctl headers to userspace
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (4 preceding siblings ...)
  2009-03-23 17:55           ` [PATCH 02/11] xen: add /dev/xen/evtchn driver Jeremy Fitzhardinge
@ 2009-03-23 17:55           ` Jeremy Fitzhardinge
  2009-03-23 17:55           ` [PATCH 04/11] xen/dev-evtchn: clean up locking in evtchn Jeremy Fitzhardinge
                             ` (23 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Xen-devel,
	Ian Campbell, Jeremy Fitzhardinge

From: Ian Campbell <ian.campbell@citrix.com>

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 include/Kbuild     |    1 +
 include/xen/Kbuild |    1 +
 2 files changed, 2 insertions(+), 0 deletions(-)
 create mode 100644 include/xen/Kbuild

diff --git a/include/Kbuild b/include/Kbuild
index d8c3e3c..fe36acc 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -8,3 +8,4 @@ header-y += mtd/
 header-y += rdma/
 header-y += video/
 header-y += drm/
+header-y += xen/
diff --git a/include/xen/Kbuild b/include/xen/Kbuild
new file mode 100644
index 0000000..4e65c16
--- /dev/null
+++ b/include/xen/Kbuild
@@ -0,0 +1 @@
+header-y += evtchn.h
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 04/11] xen/dev-evtchn: clean up locking in evtchn
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (5 preceding siblings ...)
  2009-03-23 17:55           ` [PATCH 03/11] xen: export ioctl headers to userspace Jeremy Fitzhardinge
@ 2009-03-23 17:55           ` Jeremy Fitzhardinge
  2009-03-23 17:55           ` [PATCH 05/11] xen: add "capabilities" file Jeremy Fitzhardinge
                             ` (22 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Xen-devel,
	Jeremy Fitzhardinge

From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Define a new per_user_data mutex to serialize bind/unbind operations
to prevent them from racing with each other.  Fix error returns
and don't do a bind while holding a spinlock.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/evtchn.c |   37 +++++++++++++++++++++++++------------
 1 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 517b9ee..af03195 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -54,6 +54,8 @@
 #include <asm/xen/hypervisor.h>
 
 struct per_user_data {
+	struct mutex bind_mutex; /* serialize bind/unbind operations */
+
 	/* Notification ring, accessed via /dev/xen/evtchn. */
 #define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
 #define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
@@ -69,7 +71,7 @@ struct per_user_data {
 
 /* Who's bound to each port? */
 static struct per_user_data *port_user[NR_EVENT_CHANNELS];
-static DEFINE_SPINLOCK(port_user_lock);
+static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
 
 irqreturn_t evtchn_interrupt(int irq, void *data)
 {
@@ -210,22 +212,24 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
 
 static int evtchn_bind_to_user(struct per_user_data *u, int port)
 {
-	int irq;
 	int rc = 0;
 
-	spin_lock_irq(&port_user_lock);
-
+	/*
+	 * Ports are never reused, so every caller should pass in a
+	 * unique port.
+	 *
+	 * (Locking not necessary because we haven't registered the
+	 * interrupt handler yet, and our caller has already
+	 * serialized bind operations.)
+	 */
 	BUG_ON(port_user[port] != NULL);
-
-	irq = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
-					u->name, (void *)(unsigned long)port);
-	if (rc < 0)
-		goto fail;
-
 	port_user[port] = u;
 
-fail:
-	spin_unlock_irq(&port_user_lock);
+	rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
+				       u->name, (void *)(unsigned long)port);
+	if (rc >= 0)
+		rc = 0;
+
 	return rc;
 }
 
@@ -234,6 +238,10 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port)
 	int irq = irq_from_evtchn(port);
 
 	unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+
+	/* make sure we unbind the irq handler before clearing the port */
+	barrier();
+
 	port_user[port] = NULL;
 }
 
@@ -244,6 +252,9 @@ static long evtchn_ioctl(struct file *file,
 	struct per_user_data *u = file->private_data;
 	void __user *uarg = (void __user *) arg;
 
+	/* Prevent bind from racing with unbind */
+	mutex_lock(&u->bind_mutex);
+
 	switch (cmd) {
 	case IOCTL_EVTCHN_BIND_VIRQ: {
 		struct ioctl_evtchn_bind_virq bind;
@@ -368,6 +379,7 @@ static long evtchn_ioctl(struct file *file,
 		rc = -ENOSYS;
 		break;
 	}
+	mutex_unlock(&u->bind_mutex);
 
 	return rc;
 }
@@ -414,6 +426,7 @@ static int evtchn_open(struct inode *inode, struct file *filp)
 		return -ENOMEM;
 	}
 
+	mutex_init(&u->bind_mutex);
 	mutex_init(&u->ring_cons_mutex);
 
 	filp->private_data = u;
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 05/11] xen: add "capabilities" file
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (6 preceding siblings ...)
  2009-03-23 17:55           ` [PATCH 04/11] xen/dev-evtchn: clean up locking in evtchn Jeremy Fitzhardinge
@ 2009-03-23 17:55           ` Jeremy Fitzhardinge
  2009-03-23 17:55           ` [PATCH 06/11] xen: add /sys/hypervisor support Jeremy Fitzhardinge
                             ` (21 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Xen-devel,
	Jeremy Fitzhardinge, Jeremy Fitzhardinge

The xenfs capabilities file allows usermode to determine what
capabilities the domain has.  The only one at present is "control_d"
in a privileged domain.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/xenfs/super.c |   19 ++++++++++++++++++-
 1 files changed, 18 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 515741a..6559e0c 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -20,10 +20,27 @@
 MODULE_DESCRIPTION("Xen filesystem");
 MODULE_LICENSE("GPL");
 
+static ssize_t capabilities_read(struct file *file, char __user *buf,
+				 size_t size, loff_t *off)
+{
+	char *tmp = "";
+
+	if (xen_initial_domain())
+		tmp = "control_d\n";
+
+	return simple_read_from_buffer(buf, size, off, tmp, strlen(tmp));
+}
+
+static const struct file_operations capabilities_file_ops = {
+	.read = capabilities_read,
+};
+
 static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	static struct tree_descr xenfs_files[] = {
-		[2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR},
+		[1] = {},
+		{ "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
+		{ "capabilities", &capabilities_file_ops, S_IRUGO },
 		{""},
 	};
 
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 06/11] xen: add /sys/hypervisor support
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (7 preceding siblings ...)
  2009-03-23 17:55           ` [PATCH 05/11] xen: add "capabilities" file Jeremy Fitzhardinge
@ 2009-03-23 17:55           ` Jeremy Fitzhardinge
  2009-03-23 17:55           ` [PATCH 07/11] xen/sys/hypervisor: change writable_pt to features Jeremy Fitzhardinge
                             ` (20 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Xen-devel,
	Jeremy Fitzhardinge

From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Adds support for Xen info under /sys/hypervisor.  Taken from Novell 2.6.27
backport tree.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/Kconfig             |   10 +
 drivers/xen/Makefile            |    3 +-
 drivers/xen/sys-hypervisor.c    |  475 +++++++++++++++++++++++++++++++++++++++
 include/xen/interface/version.h |    3 +
 4 files changed, 490 insertions(+), 1 deletions(-)
 create mode 100644 drivers/xen/sys-hypervisor.c

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 526187c..88bca1c 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -41,3 +41,13 @@ config XEN_COMPAT_XENFS
          a xen platform.
          If in doubt, say yes.
 
+config XEN_SYS_HYPERVISOR
+       bool "Create xen entries under /sys/hypervisor"
+       depends on XEN && SYSFS
+       select SYS_HYPERVISOR
+       default y
+       help
+         Create entries under /sys/hypervisor describing the Xen
+	 hypervisor environment.  When running native or in another
+	 virtual environment, /sys/hypervisor will still be present,
+	 but will have no xen contents.
\ No newline at end of file
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index ff8accc..f3603a3 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -4,4 +4,5 @@ obj-y	+= xenbus/
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
-obj-$(CONFIG_XENFS)		+= xenfs/
\ No newline at end of file
+obj-$(CONFIG_XENFS)		+= xenfs/
+obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
new file mode 100644
index 0000000..cb29d1c
--- /dev/null
+++ b/drivers/xen/sys-hypervisor.c
@@ -0,0 +1,475 @@
+/*
+ *  copyright (c) 2006 IBM Corporation
+ *  Authored by: Mike D. Day <ncmike@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kobject.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/xenbus.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/version.h>
+
+#define HYPERVISOR_ATTR_RO(_name) \
+static struct hyp_sysfs_attr  _name##_attr = __ATTR_RO(_name)
+
+#define HYPERVISOR_ATTR_RW(_name) \
+static struct hyp_sysfs_attr _name##_attr = \
+	__ATTR(_name, 0644, _name##_show, _name##_store)
+
+struct hyp_sysfs_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct hyp_sysfs_attr *, char *);
+	ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t);
+	void *hyp_attr_data;
+};
+
+static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	return sprintf(buffer, "xen\n");
+}
+
+HYPERVISOR_ATTR_RO(type);
+
+static int __init xen_sysfs_type_init(void)
+{
+	return sysfs_create_file(hypervisor_kobj, &type_attr.attr);
+}
+
+static void xen_sysfs_type_destroy(void)
+{
+	sysfs_remove_file(hypervisor_kobj, &type_attr.attr);
+}
+
+/* xen version attributes */
+static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int version = HYPERVISOR_xen_version(XENVER_version, NULL);
+	if (version)
+		return sprintf(buffer, "%d\n", version >> 16);
+	return -ENODEV;
+}
+
+HYPERVISOR_ATTR_RO(major);
+
+static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int version = HYPERVISOR_xen_version(XENVER_version, NULL);
+	if (version)
+		return sprintf(buffer, "%d\n", version & 0xff);
+	return -ENODEV;
+}
+
+HYPERVISOR_ATTR_RO(minor);
+
+static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	char *extra;
+
+	extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
+	if (extra) {
+		ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", extra);
+		kfree(extra);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(extra);
+
+static struct attribute *version_attrs[] = {
+	&major_attr.attr,
+	&minor_attr.attr,
+	&extra_attr.attr,
+	NULL
+};
+
+static struct attribute_group version_group = {
+	.name = "version",
+	.attrs = version_attrs,
+};
+
+static int __init xen_sysfs_version_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &version_group);
+}
+
+static void xen_sysfs_version_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &version_group);
+}
+
+/* UUID */
+
+static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	char *vm, *val;
+	int ret;
+	extern int xenstored_ready;
+
+	if (!xenstored_ready)
+		return -EBUSY;
+
+	vm = xenbus_read(XBT_NIL, "vm", "", NULL);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+	val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
+	kfree(vm);
+	if (IS_ERR(val))
+		return PTR_ERR(val);
+	ret = sprintf(buffer, "%s\n", val);
+	kfree(val);
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(uuid);
+
+static int __init xen_sysfs_uuid_init(void)
+{
+	return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr);
+}
+
+static void xen_sysfs_uuid_destroy(void)
+{
+	sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
+}
+
+/* xen compilation attributes */
+
+static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_compile_info *info;
+
+	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+	if (info) {
+		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", info->compiler);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(compiler);
+
+static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_compile_info *info;
+
+	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+	if (info) {
+		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", info->compile_by);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(compiled_by);
+
+static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_compile_info *info;
+
+	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+	if (info) {
+		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", info->compile_date);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(compile_date);
+
+static struct attribute *xen_compile_attrs[] = {
+	&compiler_attr.attr,
+	&compiled_by_attr.attr,
+	&compile_date_attr.attr,
+	NULL
+};
+
+static struct attribute_group xen_compilation_group = {
+	.name = "compilation",
+	.attrs = xen_compile_attrs,
+};
+
+int __init static xen_compilation_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &xen_compilation_group);
+}
+
+static void xen_compilation_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &xen_compilation_group);
+}
+
+/* xen properties info */
+
+static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	char *caps;
+
+	caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
+	if (caps) {
+		ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", caps);
+		kfree(caps);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(capabilities);
+
+static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	char *cset;
+
+	cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
+	if (cset) {
+		ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", cset);
+		kfree(cset);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(changeset);
+
+static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_platform_parameters *parms;
+
+	parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
+	if (parms) {
+		ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
+					     parms);
+		if (!ret)
+			ret = sprintf(buffer, "%lx\n", parms->virt_start);
+		kfree(parms);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(virtual_start);
+
+static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret;
+
+	ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
+	if (ret > 0)
+		ret = sprintf(buffer, "%x\n", ret);
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(pagesize);
+
+/* eventually there will be several more features to export */
+static ssize_t xen_feature_show(int index, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_feature_info *info;
+
+	info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL);
+	if (info) {
+		info->submap_idx = index;
+		ret = HYPERVISOR_xen_version(XENVER_get_features, info);
+		if (!ret)
+			ret = sprintf(buffer, "%d\n", info->submap);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	return xen_feature_show(XENFEAT_writable_page_tables, buffer);
+}
+
+HYPERVISOR_ATTR_RO(writable_pt);
+
+static struct attribute *xen_properties_attrs[] = {
+	&capabilities_attr.attr,
+	&changeset_attr.attr,
+	&virtual_start_attr.attr,
+	&pagesize_attr.attr,
+	&writable_pt_attr.attr,
+	NULL
+};
+
+static struct attribute_group xen_properties_group = {
+	.name = "properties",
+	.attrs = xen_properties_attrs,
+};
+
+static int __init xen_properties_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &xen_properties_group);
+}
+
+static void xen_properties_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
+}
+
+#ifdef CONFIG_KEXEC
+
+extern size_t vmcoreinfo_size_xen;
+extern unsigned long paddr_vmcoreinfo_xen;
+
+static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page)
+{
+	return sprintf(page, "%lx %zx\n",
+		paddr_vmcoreinfo_xen, vmcoreinfo_size_xen);
+}
+
+HYPERVISOR_ATTR_RO(vmcoreinfo);
+
+static int __init xen_sysfs_vmcoreinfo_init(void)
+{
+	return sysfs_create_file(hypervisor_kobj,
+				 &vmcoreinfo_attr.attr);
+}
+
+static void xen_sysfs_vmcoreinfo_destroy(void)
+{
+	sysfs_remove_file(hypervisor_kobj, &vmcoreinfo_attr.attr);
+}
+
+#endif
+
+static int __init hyper_sysfs_init(void)
+{
+	int ret;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	ret = xen_sysfs_type_init();
+	if (ret)
+		goto out;
+	ret = xen_sysfs_version_init();
+	if (ret)
+		goto version_out;
+	ret = xen_compilation_init();
+	if (ret)
+		goto comp_out;
+	ret = xen_sysfs_uuid_init();
+	if (ret)
+		goto uuid_out;
+	ret = xen_properties_init();
+	if (ret)
+		goto prop_out;
+#ifdef CONFIG_KEXEC
+	if (vmcoreinfo_size_xen != 0) {
+		ret = xen_sysfs_vmcoreinfo_init();
+		if (ret)
+			goto vmcoreinfo_out;
+	}
+#endif
+
+	goto out;
+
+#ifdef CONFIG_KEXEC
+vmcoreinfo_out:
+#endif
+	xen_properties_destroy();
+prop_out:
+	xen_sysfs_uuid_destroy();
+uuid_out:
+	xen_compilation_destroy();
+comp_out:
+	xen_sysfs_version_destroy();
+version_out:
+	xen_sysfs_type_destroy();
+out:
+	return ret;
+}
+
+static void __exit hyper_sysfs_exit(void)
+{
+#ifdef CONFIG_KEXEC
+	if (vmcoreinfo_size_xen != 0)
+		xen_sysfs_vmcoreinfo_destroy();
+#endif
+	xen_properties_destroy();
+	xen_compilation_destroy();
+	xen_sysfs_uuid_destroy();
+	xen_sysfs_version_destroy();
+	xen_sysfs_type_destroy();
+
+}
+module_init(hyper_sysfs_init);
+module_exit(hyper_sysfs_exit);
+
+static ssize_t hyp_sysfs_show(struct kobject *kobj,
+			      struct attribute *attr,
+			      char *buffer)
+{
+	struct hyp_sysfs_attr *hyp_attr;
+	hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
+	if (hyp_attr->show)
+		return hyp_attr->show(hyp_attr, buffer);
+	return 0;
+}
+
+static ssize_t hyp_sysfs_store(struct kobject *kobj,
+			       struct attribute *attr,
+			       const char *buffer,
+			       size_t len)
+{
+	struct hyp_sysfs_attr *hyp_attr;
+	hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
+	if (hyp_attr->store)
+		return hyp_attr->store(hyp_attr, buffer, len);
+	return 0;
+}
+
+static struct sysfs_ops hyp_sysfs_ops = {
+	.show = hyp_sysfs_show,
+	.store = hyp_sysfs_store,
+};
+
+static struct kobj_type hyp_sysfs_kobj_type = {
+	.sysfs_ops = &hyp_sysfs_ops,
+};
+
+static int __init hypervisor_subsys_init(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	hypervisor_kobj->ktype = &hyp_sysfs_kobj_type;
+	return 0;
+}
+device_initcall(hypervisor_subsys_init);
diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h
index 453235e..e8b6519 100644
--- a/include/xen/interface/version.h
+++ b/include/xen/interface/version.h
@@ -57,4 +57,7 @@ struct xen_feature_info {
 /* Declares the features reported by XENVER_get_features. */
 #include "features.h"
 
+/* arg == NULL; returns host memory page size. */
+#define XENVER_pagesize 7
+
 #endif /* __XEN_PUBLIC_VERSION_H__ */
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 07/11] xen/sys/hypervisor: change writable_pt to features
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (8 preceding siblings ...)
  2009-03-23 17:55           ` [PATCH 06/11] xen: add /sys/hypervisor support Jeremy Fitzhardinge
@ 2009-03-23 17:55           ` Jeremy Fitzhardinge
  2009-03-23 17:55           ` [PATCH 08/11] xen: drop kexec bits from /sys/hypervisor since kexec isn't implemented yet Jeremy Fitzhardinge
                             ` (19 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Xen-devel,
	Jeremy Fitzhardinge

From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

/sys/hypervisor/properties/writable_pt was misnamed.  Rename to features,
expressed as a bit array in hex.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/sys-hypervisor.c |   41 ++++++++++++++++++++++++++---------------
 1 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index cb29d1c..1267d6f 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -293,37 +293,48 @@ static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
 
 HYPERVISOR_ATTR_RO(pagesize);
 
-/* eventually there will be several more features to export */
 static ssize_t xen_feature_show(int index, char *buffer)
 {
-	int ret = -ENOMEM;
-	struct xen_feature_info *info;
+	ssize_t ret;
+	struct xen_feature_info info;
 
-	info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL);
-	if (info) {
-		info->submap_idx = index;
-		ret = HYPERVISOR_xen_version(XENVER_get_features, info);
-		if (!ret)
-			ret = sprintf(buffer, "%d\n", info->submap);
-		kfree(info);
-	}
+	info.submap_idx = index;
+	ret = HYPERVISOR_xen_version(XENVER_get_features, &info);
+	if (!ret)
+		ret = sprintf(buffer, "%08x", info.submap);
 
 	return ret;
 }
 
-static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer)
+static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer)
 {
-	return xen_feature_show(XENFEAT_writable_page_tables, buffer);
+	ssize_t len;
+	int i;
+
+	len = 0;
+	for (i = XENFEAT_NR_SUBMAPS-1; i >= 0; i--) {
+		int ret = xen_feature_show(i, buffer + len);
+		if (ret < 0) {
+			if (len == 0)
+				len = ret;
+			break;
+		}
+		len += ret;
+	}
+	if (len > 0)
+		buffer[len++] = '\n';
+
+	return len;
 }
 
-HYPERVISOR_ATTR_RO(writable_pt);
+HYPERVISOR_ATTR_RO(features);
 
 static struct attribute *xen_properties_attrs[] = {
 	&capabilities_attr.attr,
 	&changeset_attr.attr,
 	&virtual_start_attr.attr,
 	&pagesize_attr.attr,
-	&writable_pt_attr.attr,
+	&features_attr.attr,
 	NULL
 };
 
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 08/11] xen: drop kexec bits from /sys/hypervisor since kexec isn't implemented yet
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (9 preceding siblings ...)
  2009-03-23 17:55           ` [PATCH 07/11] xen/sys/hypervisor: change writable_pt to features Jeremy Fitzhardinge
@ 2009-03-23 17:55           ` Jeremy Fitzhardinge
  2009-03-23 17:55           ` [PATCH 09/11] xen: remove suspend_cancel hook Jeremy Fitzhardinge
                             ` (18 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Xen-devel,
	Ian Campbell, Ian Campbell

From: Ian Campbell <Ian.Campbell@citrix.com>

I needed this to compile since there is no kexec yet in pvops kernel
  CC      drivers/xen/sys-hypervisor.o
drivers/xen/sys-hypervisor.c: In function 'hyper_sysfs_init':
drivers/xen/sys-hypervisor.c:405: error: 'vmcoreinfo_size_xen' undeclared (first use in this function)
drivers/xen/sys-hypervisor.c:405: error: (Each undeclared identifier is reported only once
drivers/xen/sys-hypervisor.c:405: error: for each function it appears in.)
drivers/xen/sys-hypervisor.c:406: error: implicit declaration of function 'xen_sysfs_vmcoreinfo_init'
drivers/xen/sys-hypervisor.c: In function 'hyper_sysfs_exit':
drivers/xen/sys-hypervisor.c:433: error: 'vmcoreinfo_size_xen' undeclared (first use in this function)
drivers/xen/sys-hypervisor.c:434: error: implicit declaration of function 'xen_sysfs_vmcoreinfo_destroy'

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/sys-hypervisor.c |   41 -----------------------------------------
 1 files changed, 0 insertions(+), 41 deletions(-)

diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index 1267d6f..88a60e0 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -353,32 +353,6 @@ static void xen_properties_destroy(void)
 	sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
 }
 
-#ifdef CONFIG_KEXEC
-
-extern size_t vmcoreinfo_size_xen;
-extern unsigned long paddr_vmcoreinfo_xen;
-
-static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page)
-{
-	return sprintf(page, "%lx %zx\n",
-		paddr_vmcoreinfo_xen, vmcoreinfo_size_xen);
-}
-
-HYPERVISOR_ATTR_RO(vmcoreinfo);
-
-static int __init xen_sysfs_vmcoreinfo_init(void)
-{
-	return sysfs_create_file(hypervisor_kobj,
-				 &vmcoreinfo_attr.attr);
-}
-
-static void xen_sysfs_vmcoreinfo_destroy(void)
-{
-	sysfs_remove_file(hypervisor_kobj, &vmcoreinfo_attr.attr);
-}
-
-#endif
-
 static int __init hyper_sysfs_init(void)
 {
 	int ret;
@@ -401,20 +375,9 @@ static int __init hyper_sysfs_init(void)
 	ret = xen_properties_init();
 	if (ret)
 		goto prop_out;
-#ifdef CONFIG_KEXEC
-	if (vmcoreinfo_size_xen != 0) {
-		ret = xen_sysfs_vmcoreinfo_init();
-		if (ret)
-			goto vmcoreinfo_out;
-	}
-#endif
 
 	goto out;
 
-#ifdef CONFIG_KEXEC
-vmcoreinfo_out:
-#endif
-	xen_properties_destroy();
 prop_out:
 	xen_sysfs_uuid_destroy();
 uuid_out:
@@ -429,10 +392,6 @@ out:
 
 static void __exit hyper_sysfs_exit(void)
 {
-#ifdef CONFIG_KEXEC
-	if (vmcoreinfo_size_xen != 0)
-		xen_sysfs_vmcoreinfo_destroy();
-#endif
 	xen_properties_destroy();
 	xen_compilation_destroy();
 	xen_sysfs_uuid_destroy();
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 09/11] xen: remove suspend_cancel hook
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (10 preceding siblings ...)
  2009-03-23 17:55           ` [PATCH 08/11] xen: drop kexec bits from /sys/hypervisor since kexec isn't implemented yet Jeremy Fitzhardinge
@ 2009-03-23 17:55           ` Jeremy Fitzhardinge
  2009-03-23 17:55           ` [PATCH 10/11] xen: use device model for suspending xenbus devices Jeremy Fitzhardinge
                             ` (17 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Xen-devel,
	Ian Campbell, Jeremy Fitzhardinge

From: Ian Campbell <ian.campbell@citrix.com>

Remove suspend_cancel hook from xenbus_driver, in preparation for using
the device model for suspending.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/xenbus/xenbus_probe.c |   23 -----------------------
 include/xen/xenbus.h              |    1 -
 2 files changed, 0 insertions(+), 24 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 773d1cf..bd20361 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -689,27 +689,6 @@ static int suspend_dev(struct device *dev, void *data)
 	return 0;
 }
 
-static int suspend_cancel_dev(struct device *dev, void *data)
-{
-	int err = 0;
-	struct xenbus_driver *drv;
-	struct xenbus_device *xdev;
-
-	DPRINTK("");
-
-	if (dev->driver == NULL)
-		return 0;
-	drv = to_xenbus_driver(dev->driver);
-	xdev = container_of(dev, struct xenbus_device, dev);
-	if (drv->suspend_cancel)
-		err = drv->suspend_cancel(xdev);
-	if (err)
-		printk(KERN_WARNING
-		       "xenbus: suspend_cancel %s failed: %i\n",
-		       dev_name(dev), err);
-	return 0;
-}
-
 static int resume_dev(struct device *dev, void *data)
 {
 	int err;
@@ -777,8 +756,6 @@ EXPORT_SYMBOL_GPL(xenbus_resume);
 void xenbus_suspend_cancel(void)
 {
 	xs_suspend_cancel();
-	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
-	xenbus_backend_resume(suspend_cancel_dev);
 }
 EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
 
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index f87f961..0836772 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -92,7 +92,6 @@ struct xenbus_driver {
 				 enum xenbus_state backend_state);
 	int (*remove)(struct xenbus_device *dev);
 	int (*suspend)(struct xenbus_device *dev);
-	int (*suspend_cancel)(struct xenbus_device *dev);
 	int (*resume)(struct xenbus_device *dev);
 	int (*uevent)(struct xenbus_device *, char **, int, char *, int);
 	struct device_driver driver;
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 10/11] xen: use device model for suspending xenbus devices
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (11 preceding siblings ...)
  2009-03-23 17:55           ` [PATCH 09/11] xen: remove suspend_cancel hook Jeremy Fitzhardinge
@ 2009-03-23 17:55           ` Jeremy Fitzhardinge
  2009-03-23 17:55           ` [PATCH 11/11] xen/xenbus: export xenbus_dev_changed Jeremy Fitzhardinge
                             ` (16 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Xen-devel,
	Ian Campbell, Jeremy Fitzhardinge

From: Ian Campbell <ian.campbell@citrix.com>

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/manage.c              |    9 ++++-----
 drivers/xen/xenbus/xenbus_probe.c |   37 +++++++++----------------------------
 drivers/xen/xenbus/xenbus_xs.c    |    2 ++
 include/xen/xenbus.h              |    2 +-
 4 files changed, 16 insertions(+), 34 deletions(-)

diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 3ccd348..0489ea2 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -104,9 +104,8 @@ static void do_suspend(void)
 		goto out;
 	}
 
-	printk("suspending xenbus...\n");
-	/* XXX use normal device tree? */
-	xenbus_suspend();
+	printk(KERN_DEBUG "suspending xenstore...\n");
+	xs_suspend();
 
 	err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
 	if (err) {
@@ -116,9 +115,9 @@ static void do_suspend(void)
 
 	if (!cancelled) {
 		xen_arch_resume();
-		xenbus_resume();
+		xs_resume();
 	} else
-		xenbus_suspend_cancel();
+		xs_suspend_cancel();
 
 	device_resume(PMSG_RESUME);
 
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index bd20361..4649213 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -71,6 +71,9 @@ static int xenbus_probe_frontend(const char *type, const char *name);
 
 static void xenbus_dev_shutdown(struct device *_dev);
 
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
+static int xenbus_dev_resume(struct device *dev);
+
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -188,6 +191,9 @@ static struct xen_bus_type xenbus_frontend = {
 		.remove    = xenbus_dev_remove,
 		.shutdown  = xenbus_dev_shutdown,
 		.dev_attrs = xenbus_dev_attrs,
+
+		.suspend   = xenbus_dev_suspend,
+		.resume    = xenbus_dev_resume,
 	},
 };
 
@@ -669,7 +675,7 @@ static struct xenbus_watch fe_watch = {
 	.callback = frontend_changed,
 };
 
-static int suspend_dev(struct device *dev, void *data)
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
 {
 	int err = 0;
 	struct xenbus_driver *drv;
@@ -682,14 +688,14 @@ static int suspend_dev(struct device *dev, void *data)
 	drv = to_xenbus_driver(dev->driver);
 	xdev = container_of(dev, struct xenbus_device, dev);
 	if (drv->suspend)
-		err = drv->suspend(xdev);
+		err = drv->suspend(xdev, state);
 	if (err)
 		printk(KERN_WARNING
 		       "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
 	return 0;
 }
 
-static int resume_dev(struct device *dev, void *data)
+static int xenbus_dev_resume(struct device *dev)
 {
 	int err;
 	struct xenbus_driver *drv;
@@ -734,31 +740,6 @@ static int resume_dev(struct device *dev, void *data)
 	return 0;
 }
 
-void xenbus_suspend(void)
-{
-	DPRINTK("");
-
-	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
-	xenbus_backend_suspend(suspend_dev);
-	xs_suspend();
-}
-EXPORT_SYMBOL_GPL(xenbus_suspend);
-
-void xenbus_resume(void)
-{
-	xb_init_comms();
-	xs_resume();
-	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
-	xenbus_backend_resume(resume_dev);
-}
-EXPORT_SYMBOL_GPL(xenbus_resume);
-
-void xenbus_suspend_cancel(void)
-{
-	xs_suspend_cancel();
-}
-EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
-
 /* A flag to determine if xenstored is 'ready' (i.e. has started) */
 int xenstored_ready = 0;
 
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index e325eab..eab33f1 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -673,6 +673,8 @@ void xs_resume(void)
 	struct xenbus_watch *watch;
 	char token[sizeof(watch) * 2 + 1];
 
+	xb_init_comms();
+
 	mutex_unlock(&xs_state.response_mutex);
 	mutex_unlock(&xs_state.request_mutex);
 	up_write(&xs_state.transaction_mutex);
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 0836772..b9763ba 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -91,7 +91,7 @@ struct xenbus_driver {
 	void (*otherend_changed)(struct xenbus_device *dev,
 				 enum xenbus_state backend_state);
 	int (*remove)(struct xenbus_device *dev);
-	int (*suspend)(struct xenbus_device *dev);
+	int (*suspend)(struct xenbus_device *dev, pm_message_t state);
 	int (*resume)(struct xenbus_device *dev);
 	int (*uevent)(struct xenbus_device *, char **, int, char *, int);
 	struct device_driver driver;
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 11/11] xen/xenbus: export xenbus_dev_changed
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (12 preceding siblings ...)
  2009-03-23 17:55           ` [PATCH 10/11] xen: use device model for suspending xenbus devices Jeremy Fitzhardinge
@ 2009-03-23 17:55           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [GIT PULL] xen/dom0: core dom0 support Jeremy Fitzhardinge
                             ` (15 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 17:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List, Xen-devel,
	Jeremy Fitzhardinge, Jeremy Fitzhardinge

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/xenbus/xenbus_probe.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 4649213..d42e25d 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -660,6 +660,7 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
 
 	kfree(root);
 }
+EXPORT_SYMBOL_GPL(xenbus_dev_changed);
 
 static void frontend_changed(struct xenbus_watch *watch,
 			     const char **vec, unsigned int len)
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [GIT PULL] xen/dom0: core dom0 support
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (13 preceding siblings ...)
  2009-03-23 17:55           ` [PATCH 11/11] xen/xenbus: export xenbus_dev_changed Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 01/15] xen dom0: Make hvc_xen console work for dom0 Jeremy Fitzhardinge
                             ` (14 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel

This series adds core dom0 support for Xen, allowing the kernel to boot
as Xen's control domain.  It isn't much use on its own (it doesn't result
in a booting dom0 kernel), but it lays the foundations for later changes.
Native booting is unaffected.

This is largely unchanged from the last posting, other than being rebased
to a newer version of tip/master (or, more precisely, the other Xen topic
branches which themselves have been rebased).

Thanks,
	J

The following changes since commit 627b11ca44b334e201c35bf1a1582dc10efaf4a8:
  Jeremy Fitzhardinge (1):
        xen/mmu: weaken flush_tlb_other test

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git push2/xen/dom0/core

Christophe Saout (1):
      paravirtualize IO permission bitmap

Ian Campbell (4):
      xen: disable PAT
      xen/dom0: Use host E820 map
      xen: implement XENMEM_machphys_mapping
      xen: clear reserved bits in l3 entries given in the initial pagetables

Jeremy Fitzhardinge (6):
      xen dom0: Make hvc_xen console work for dom0.
      xen/dom0: use _PAGE_IOMAP in ioremap to do machine mappings
      xen: allow enable use of VGA console on dom0
      xen/dom0: add XEN_DOM0 config option
      x86: make /dev/mem mappings _PAGE_IOMAP
      x86: don't need "changed" parameter for set_io_bitmap()

Juan Quintela (2):
      xen dom0: Initialize xenbus for dom0.
      xen dom0: Set up basic IO permissions for dom0.

Stephen Tweedie (2):
      xen dom0: Add support for the platform_ops hypercall
      xen mtrr: Add mtrr_ops support for Xen mtrr

 arch/x86/include/asm/paravirt.h         |    9 ++
 arch/x86/include/asm/pat.h              |    5 +
 arch/x86/include/asm/processor.h        |    4 +
 arch/x86/include/asm/xen/hypercall.h    |    8 +
 arch/x86/include/asm/xen/interface.h    |    6 +-
 arch/x86/include/asm/xen/interface_32.h |    5 +
 arch/x86/include/asm/xen/interface_64.h |   13 +--
 arch/x86/include/asm/xen/page.h         |   15 +--
 arch/x86/kernel/cpu/mtrr/Makefile       |    1 +
 arch/x86/kernel/cpu/mtrr/amd.c          |    1 +
 arch/x86/kernel/cpu/mtrr/centaur.c      |    1 +
 arch/x86/kernel/cpu/mtrr/cyrix.c        |    1 +
 arch/x86/kernel/cpu/mtrr/generic.c      |    1 +
 arch/x86/kernel/cpu/mtrr/main.c         |   11 +-
 arch/x86/kernel/cpu/mtrr/mtrr.h         |    5 +
 arch/x86/kernel/cpu/mtrr/xen.c          |   59 ++++++++
 arch/x86/kernel/ioport.c                |   29 +++-
 arch/x86/kernel/paravirt.c              |    1 +
 arch/x86/kernel/process.c               |   27 +---
 arch/x86/mm/pat.c                       |    7 +-
 arch/x86/xen/Kconfig                    |   26 ++++
 arch/x86/xen/Makefile                   |    3 +-
 arch/x86/xen/enlighten.c                |   51 ++++++-
 arch/x86/xen/mmu.c                      |  123 ++++++++++++++++-
 arch/x86/xen/setup.c                    |   51 ++++++-
 arch/x86/xen/vga.c                      |   67 +++++++++
 arch/x86/xen/xen-ops.h                  |   12 ++
 drivers/char/hvc_xen.c                  |  101 +++++++++-----
 drivers/xen/events.c                    |    2 +-
 drivers/xen/xenbus/xenbus_probe.c       |   30 ++++-
 include/xen/events.h                    |    2 +
 include/xen/interface/memory.h          |   42 ++++++
 include/xen/interface/platform.h        |  232 +++++++++++++++++++++++++++++++
 include/xen/interface/xen.h             |   41 ++++++
 34 files changed, 885 insertions(+), 107 deletions(-)
 create mode 100644 arch/x86/kernel/cpu/mtrr/xen.c
 create mode 100644 arch/x86/xen/vga.c
 create mode 100644 include/xen/interface/platform.h


^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 01/15] xen dom0: Make hvc_xen console work for dom0.
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (14 preceding siblings ...)
  2009-03-23 18:09           ` [GIT PULL] xen/dom0: core dom0 support Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 02/15] xen dom0: Initialize xenbus " Jeremy Fitzhardinge
                             ` (13 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Jeremy Fitzhardinge, Jeremy Fitzhardinge,
	Juan Quintela

Impact: Add Xen dom0 console

Use the console hypercalls for dom0 console.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 drivers/char/hvc_xen.c |  101 ++++++++++++++++++++++++++++++++----------------
 drivers/xen/events.c   |    2 +-
 include/xen/events.h   |    2 +
 3 files changed, 70 insertions(+), 35 deletions(-)

diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
index eba999f..81d9186 100644
--- a/drivers/char/hvc_xen.c
+++ b/drivers/char/hvc_xen.c
@@ -55,7 +55,7 @@ static inline void notify_daemon(void)
 	notify_remote_via_evtchn(xen_start_info->console.domU.evtchn);
 }
 
-static int write_console(uint32_t vtermno, const char *data, int len)
+static int domU_write_console(uint32_t vtermno, const char *data, int len)
 {
 	struct xencons_interface *intf = xencons_interface();
 	XENCONS_RING_IDX cons, prod;
@@ -76,7 +76,7 @@ static int write_console(uint32_t vtermno, const char *data, int len)
 	return sent;
 }
 
-static int read_console(uint32_t vtermno, char *buf, int len)
+static int domU_read_console(uint32_t vtermno, char *buf, int len)
 {
 	struct xencons_interface *intf = xencons_interface();
 	XENCONS_RING_IDX cons, prod;
@@ -97,28 +97,63 @@ static int read_console(uint32_t vtermno, char *buf, int len)
 	return recv;
 }
 
-static struct hv_ops hvc_ops = {
-	.get_chars = read_console,
-	.put_chars = write_console,
+static struct hv_ops domU_hvc_ops = {
+	.get_chars = domU_read_console,
+	.put_chars = domU_write_console,
 	.notifier_add = notifier_add_irq,
 	.notifier_del = notifier_del_irq,
 	.notifier_hangup = notifier_hangup_irq,
 };
 
-static int __init xen_init(void)
+static int dom0_read_console(uint32_t vtermno, char *buf, int len)
+{
+	return HYPERVISOR_console_io(CONSOLEIO_read, len, buf);
+}
+
+/*
+ * Either for a dom0 to write to the system console, or a domU with a
+ * debug version of Xen
+ */
+static int dom0_write_console(uint32_t vtermno, const char *str, int len)
+{
+	int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str);
+	if (rc < 0)
+		return 0;
+
+	return len;
+}
+
+static struct hv_ops dom0_hvc_ops = {
+	.get_chars = dom0_read_console,
+	.put_chars = dom0_write_console,
+	.notifier_add = notifier_add_irq,
+	.notifier_del = notifier_del_irq,
+	.notifier_hangup = notifier_hangup_irq,
+};
+
+static int __init xen_hvc_init(void)
 {
 	struct hvc_struct *hp;
+	struct hv_ops *ops;
 
-	if (!xen_pv_domain() ||
-	    xen_initial_domain() ||
-	    !xen_start_info->console.domU.evtchn)
-		return -ENODEV;
+	if (!xen_pv_domain())
+ 		return -ENODEV;
+
+	if (xen_initial_domain()) {
+		ops = &dom0_hvc_ops;
+		xencons_irq = bind_virq_to_irq(VIRQ_CONSOLE, 0);
+	} else {
+		if (!xen_start_info->console.domU.evtchn)
+			return -ENODEV;
+
+		ops = &domU_hvc_ops;
+		xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
+	}
 
-	xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
 	if (xencons_irq < 0)
 		xencons_irq = 0; /* NO_IRQ */
 
-	hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256);
+	hp = hvc_alloc(HVC_COOKIE, xencons_irq, ops, 256);
 	if (IS_ERR(hp))
 		return PTR_ERR(hp);
 
@@ -135,7 +170,7 @@ void xen_console_resume(void)
 		rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq);
 }
 
-static void __exit xen_fini(void)
+static void __exit xen_hvc_fini(void)
 {
 	if (hvc)
 		hvc_remove(hvc);
@@ -143,29 +178,24 @@ static void __exit xen_fini(void)
 
 static int xen_cons_init(void)
 {
+	struct hv_ops *ops;
+
 	if (!xen_pv_domain())
 		return 0;
 
-	hvc_instantiate(HVC_COOKIE, 0, &hvc_ops);
+	ops = &domU_hvc_ops;
+	if (xen_initial_domain())
+		ops = &dom0_hvc_ops;
+
+	hvc_instantiate(HVC_COOKIE, 0, ops);
+
 	return 0;
 }
 
-module_init(xen_init);
-module_exit(xen_fini);
+module_init(xen_hvc_init);
+module_exit(xen_hvc_fini);
 console_initcall(xen_cons_init);
 
-static void raw_console_write(const char *str, int len)
-{
-	while(len > 0) {
-		int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str);
-		if (rc <= 0)
-			break;
-
-		str += rc;
-		len -= rc;
-	}
-}
-
 #ifdef CONFIG_EARLY_PRINTK
 static void xenboot_write_console(struct console *console, const char *string,
 				  unsigned len)
@@ -173,19 +203,22 @@ static void xenboot_write_console(struct console *console, const char *string,
 	unsigned int linelen, off = 0;
 	const char *pos;
 
-	raw_console_write(string, len);
+	dom0_write_console(0, string, len);
+
+	if (xen_initial_domain())
+		return;
 
-	write_console(0, "(early) ", 8);
+	domU_write_console(0, "(early) ", 8);
 	while (off < len && NULL != (pos = strchr(string+off, '\n'))) {
 		linelen = pos-string+off;
 		if (off + linelen > len)
 			break;
-		write_console(0, string+off, linelen);
-		write_console(0, "\r\n", 2);
+		domU_write_console(0, string+off, linelen);
+		domU_write_console(0, "\r\n", 2);
 		off += linelen + 1;
 	}
 	if (off < len)
-		write_console(0, string+off, len-off);
+		domU_write_console(0, string+off, len-off);
 }
 
 struct console xenboot_console = {
@@ -197,7 +230,7 @@ struct console xenboot_console = {
 
 void xen_raw_console_write(const char *str)
 {
-	raw_console_write(str, strlen(str));
+	dom0_write_console(0, str, strlen(str));
 }
 
 void xen_raw_printk(const char *fmt, ...)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 30963af..f46e880 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -404,7 +404,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 }
 
 
-static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 {
 	struct evtchn_bind_virq bind_virq;
 	int evtchn, irq;
diff --git a/include/xen/events.h b/include/xen/events.h
index 0d5f1ad..0397ba1 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -12,6 +12,8 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
 			      irq_handler_t handler,
 			      unsigned long irqflags, const char *devname,
 			      void *dev_id);
+int bind_virq_to_irq(unsigned int virq, unsigned int cpu);
+
 int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
 			    irq_handler_t handler,
 			    unsigned long irqflags, const char *devname,
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 02/15] xen dom0: Initialize xenbus for dom0.
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (15 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 01/15] xen dom0: Make hvc_xen console work for dom0 Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 03/15] xen dom0: Set up basic IO permissions " Jeremy Fitzhardinge
                             ` (12 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Juan Quintela, Jeremy Fitzhardinge

From: Juan Quintela <quintela@redhat.com>

Impact: initialize Xenbus

Do initial xenbus/xenstore setup in dom0.  In dom0 we need to actually
allocate the xenstore resources, rather than being given them from
outside.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 drivers/xen/xenbus/xenbus_probe.c |   30 +++++++++++++++++++++++++++++-
 1 files changed, 29 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 773d1cf..38aaec3 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -821,6 +821,7 @@ void xenbus_probe(struct work_struct *unused)
 static int __init xenbus_probe_init(void)
 {
 	int err = 0;
+	unsigned long page = 0;
 
 	DPRINTK("");
 
@@ -841,7 +842,31 @@ static int __init xenbus_probe_init(void)
 	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
 	 */
 	if (xen_initial_domain()) {
-		/* dom0 not yet supported */
+		struct evtchn_alloc_unbound alloc_unbound;
+
+		/* Allocate Xenstore page */
+		page = get_zeroed_page(GFP_KERNEL);
+		if (!page)
+			return -ENOMEM;
+
+		xen_store_mfn = xen_start_info->store_mfn =
+			pfn_to_mfn(virt_to_phys((void *)page) >>
+				   PAGE_SHIFT);
+
+		/* Next allocate a local port which xenstored can bind to */
+		alloc_unbound.dom        = DOMID_SELF;
+		alloc_unbound.remote_dom = 0;
+
+		err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+						  &alloc_unbound);
+		if (err == -ENOSYS)
+			goto out_unreg_front;
+
+		BUG_ON(err);
+		xen_store_evtchn = xen_start_info->store_evtchn =
+			alloc_unbound.port;
+
+		xen_store_interface = mfn_to_virt(xen_store_mfn);
 	} else {
 		xenstored_ready = 1;
 		xen_store_evtchn = xen_start_info->store_evtchn;
@@ -877,6 +902,9 @@ static int __init xenbus_probe_init(void)
 	bus_unregister(&xenbus_frontend.bus);
 
   out_error:
+	if (page != 0)
+		free_page(page);
+
 	return err;
 }
 
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 03/15] xen dom0: Set up basic IO permissions for dom0.
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (16 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 02/15] xen dom0: Initialize xenbus " Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 04/15] xen dom0: Add support for the platform_ops hypercall Jeremy Fitzhardinge
                             ` (11 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Juan Quintela, Jeremy Fitzhardinge

From: Juan Quintela <quintela@redhat.com>

Impact: set iopl for dom0 kernel; map ISA

Add the direct mapping area for ISA bus access, and enable IO space
access for the guest when running as dom0.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 arch/x86/xen/enlighten.c |    8 ++++++++
 arch/x86/xen/mmu.c       |   24 ++++++++++++++++++++++++
 arch/x86/xen/setup.c     |    6 +++++-
 arch/x86/xen/xen-ops.h   |    1 +
 4 files changed, 38 insertions(+), 1 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index da33e0c..19af31b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -989,6 +989,7 @@ asmlinkage void __init xen_start_kernel(void)
 
 	xen_raw_console_write("mapping kernel into physical memory\n");
 	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
+	xen_ident_map_ISA();
 
 	init_mm.pgd = pgd;
 
@@ -998,6 +999,13 @@ asmlinkage void __init xen_start_kernel(void)
 	if (xen_feature(XENFEAT_supervisor_mode_kernel))
 		pv_info.kernel_rpl = 0;
 
+	if (xen_initial_domain()) {
+		struct physdev_set_iopl set_iopl;
+		set_iopl.iopl = 1;
+		if (HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl) == -1)
+			BUG();
+	}
+
 	/* set the limit of our address space */
 	xen_reserve_top();
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b4e828c..e80e63c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1635,6 +1635,7 @@ static void *m2v(phys_addr_t maddr)
 	return __ka(m2p(maddr));
 }
 
+/* Set the page permissions on an identity-mapped pages */
 static void set_page_prot(void *addr, pgprot_t prot)
 {
 	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
@@ -1860,6 +1861,29 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
 #endif
 }
 
+__init void xen_ident_map_ISA(void)
+{
+	unsigned long pa;
+
+	/*
+	 * If we're dom0, then linear map the ISA machine addresses into
+	 * the kernel's address space.
+	 */
+	if (!xen_initial_domain())
+		return;
+
+	xen_raw_printk("Xen: setup ISA identity maps\n");
+
+	for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) {
+		pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO);
+
+		if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0))
+			BUG();
+	}
+
+	xen_flush_tlb();
+}
+
 __init void xen_post_allocator_init(void)
 {
 	pv_mmu_ops.set_pte = xen_set_pte;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 15c6c68..3e4cf46 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -51,6 +51,9 @@ char * __init xen_memory_setup(void)
 	 * Even though this is normal, usable memory under Xen, reserve
 	 * ISA memory anyway because too many things think they can poke
 	 * about in there.
+	 *
+	 * In a dom0 kernel, this region is identity mapped with the
+	 * hardware ISA area, so it really is out of bounds.
 	 */
 	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
 			E820_RESERVED);
@@ -188,7 +191,8 @@ void __init xen_arch_setup(void)
 
 	pm_idle = xen_idle;
 
-	paravirt_disable_iospace();
+	if (!xen_initial_domain())
+		paravirt_disable_iospace();
 
 	fiddle_vdso();
 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 5c50a10..6ff6203 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -29,6 +29,7 @@ void xen_setup_machphys_mapping(void);
 pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 void xen_ident_map_ISA(void);
 void xen_reserve_top(void);
+void xen_ident_map_ISA(void);
 
 void xen_post_allocator_init(void);
 
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 04/15] xen dom0: Add support for the platform_ops hypercall
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (17 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 03/15] xen dom0: Set up basic IO permissions " Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 05/15] xen mtrr: Add mtrr_ops support for Xen mtrr Jeremy Fitzhardinge
                             ` (10 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Stephen Tweedie, Jeremy Fitzhardinge

From: Stephen Tweedie <sct@redhat.com>

Impact: add hypercall definitions

Minimal changes to get platform ops (renamed dom0_ops on pv_ops) working
on pv_ops builds.  Pulls in upstream linux-2.6.18-xen.hg's platform.h

Signed-off-by: Stephen Tweedie <sct@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/xen/hypercall.h |    8 ++
 include/xen/interface/platform.h     |  232 ++++++++++++++++++++++++++++++++++
 include/xen/interface/xen.h          |    2 +
 3 files changed, 242 insertions(+), 0 deletions(-)
 create mode 100644 include/xen/interface/platform.h

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9c371e4..6528f3b 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -45,6 +45,7 @@
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
+#include <xen/interface/platform.h>
 
 /*
  * The hypercall asms have to meet several constraints:
@@ -282,6 +283,13 @@ HYPERVISOR_set_timer_op(u64 timeout)
 }
 
 static inline int
+HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
+{
+	platform_op->interface_version = XENPF_INTERFACE_VERSION;
+	return _hypercall1(int, dom0_op, platform_op);
+}
+
+static inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
new file mode 100644
index 0000000..da548f3
--- /dev/null
+++ b/include/xen/interface/platform.h
@@ -0,0 +1,232 @@
+/******************************************************************************
+ * platform.h
+ *
+ * Hardware platform operations. Intended for use by domain-0 kernel.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_PLATFORM_H__
+#define __XEN_PUBLIC_PLATFORM_H__
+
+#include "xen.h"
+
+#define XENPF_INTERFACE_VERSION 0x03000001
+
+/*
+ * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC,
+ * 1 January, 1970 if the current system time was <system_time>.
+ */
+#define XENPF_settime             17
+struct xenpf_settime {
+    /* IN variables. */
+    uint32_t secs;
+    uint32_t nsecs;
+    uint64_t system_time;
+};
+typedef struct xenpf_settime xenpf_settime_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime_t);
+
+/*
+ * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type.
+ * On x86, @type is an architecture-defined MTRR memory type.
+ * On success, returns the MTRR that was used (@reg) and a handle that can
+ * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting.
+ * (x86-specific).
+ */
+#define XENPF_add_memtype         31
+struct xenpf_add_memtype {
+    /* IN variables. */
+    unsigned long mfn;
+    uint64_t nr_mfns;
+    uint32_t type;
+    /* OUT variables. */
+    uint32_t handle;
+    uint32_t reg;
+};
+typedef struct xenpf_add_memtype xenpf_add_memtype_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_add_memtype_t);
+
+/*
+ * Tear down an existing memory-range type. If @handle is remembered then it
+ * should be passed in to accurately tear down the correct setting (in case
+ * of overlapping memory regions with differing types). If it is not known
+ * then @handle should be set to zero. In all cases @reg must be set.
+ * (x86-specific).
+ */
+#define XENPF_del_memtype         32
+struct xenpf_del_memtype {
+    /* IN variables. */
+    uint32_t handle;
+    uint32_t reg;
+};
+typedef struct xenpf_del_memtype xenpf_del_memtype_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_del_memtype_t);
+
+/* Read current type of an MTRR (x86-specific). */
+#define XENPF_read_memtype        33
+struct xenpf_read_memtype {
+    /* IN variables. */
+    uint32_t reg;
+    /* OUT variables. */
+    unsigned long mfn;
+    uint64_t nr_mfns;
+    uint32_t type;
+};
+typedef struct xenpf_read_memtype xenpf_read_memtype_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_read_memtype_t);
+
+#define XENPF_microcode_update    35
+struct xenpf_microcode_update {
+    /* IN variables. */
+    GUEST_HANDLE(void) data;          /* Pointer to microcode data */
+    uint32_t length;                  /* Length of microcode data. */
+};
+typedef struct xenpf_microcode_update xenpf_microcode_update_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_microcode_update_t);
+
+#define XENPF_platform_quirk      39
+#define QUIRK_NOIRQBALANCING      1 /* Do not restrict IO-APIC RTE targets */
+#define QUIRK_IOAPIC_BAD_REGSEL   2 /* IO-APIC REGSEL forgets its value    */
+#define QUIRK_IOAPIC_GOOD_REGSEL  3 /* IO-APIC REGSEL behaves properly     */
+struct xenpf_platform_quirk {
+    /* IN variables. */
+    uint32_t quirk_id;
+};
+typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_platform_quirk_t);
+
+#define XENPF_firmware_info       50
+#define XEN_FW_DISK_INFO          1 /* from int 13 AH=08/41/48 */
+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
+#define XEN_FW_VBEDDC_INFO        3 /* from int 10 AX=4f15 */
+struct xenpf_firmware_info {
+    /* IN variables. */
+    uint32_t type;
+    uint32_t index;
+    /* OUT variables. */
+    union {
+        struct {
+            /* Int13, Fn48: Check Extensions Present. */
+            uint8_t device;                   /* %dl: bios device number */
+            uint8_t version;                  /* %ah: major version      */
+            uint16_t interface_support;       /* %cx: support bitmap     */
+            /* Int13, Fn08: Legacy Get Device Parameters. */
+            uint16_t legacy_max_cylinder;     /* %cl[7:6]:%ch: max cyl # */
+            uint8_t legacy_max_head;          /* %dh: max head #         */
+            uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector #  */
+            /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+            /* NB. First uint16_t of buffer must be set to buffer size.      */
+            GUEST_HANDLE(void) edd_params;
+        } disk_info; /* XEN_FW_DISK_INFO */
+        struct {
+            uint8_t device;                   /* bios device number  */
+            uint32_t mbr_signature;           /* offset 0x1b8 in mbr */
+        } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
+        struct {
+            /* Int10, AX=4F15: Get EDID info. */
+            uint8_t capabilities;
+            uint8_t edid_transfer_time;
+            /* must refer to 128-byte buffer */
+            GUEST_HANDLE(uchar) edid;
+        } vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+    } u;
+};
+typedef struct xenpf_firmware_info xenpf_firmware_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_firmware_info_t);
+
+#define XENPF_enter_acpi_sleep    51
+struct xenpf_enter_acpi_sleep {
+    /* IN variables */
+    uint16_t pm1a_cnt_val;      /* PM1a control value. */
+    uint16_t pm1b_cnt_val;      /* PM1b control value. */
+    uint32_t sleep_state;       /* Which state to enter (Sn). */
+    uint32_t flags;             /* Must be zero. */
+};
+typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_enter_acpi_sleep_t);
+
+#define XENPF_change_freq         52
+struct xenpf_change_freq {
+    /* IN variables */
+    uint32_t flags; /* Must be zero. */
+    uint32_t cpu;   /* Physical cpu. */
+    uint64_t freq;  /* New frequency (Hz). */
+};
+typedef struct xenpf_change_freq xenpf_change_freq_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_change_freq_t);
+
+/*
+ * Get idle times (nanoseconds since boot) for physical CPUs specified in the
+ * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is
+ * indexed by CPU number; only entries with the corresponding @cpumap_bitmap
+ * bit set are written to. On return, @cpumap_bitmap is modified so that any
+ * non-existent CPUs are cleared. Such CPUs have their @idletime array entry
+ * cleared.
+ */
+#define XENPF_getidletime         53
+struct xenpf_getidletime {
+    /* IN/OUT variables */
+    /* IN: CPUs to interrogate; OUT: subset of IN which are present */
+    GUEST_HANDLE(uchar) cpumap_bitmap;
+    /* IN variables */
+    /* Size of cpumap bitmap. */
+    uint32_t cpumap_nr_cpus;
+    /* Must be indexable for every cpu in cpumap_bitmap. */
+    GUEST_HANDLE(uint64_t) idletime;
+    /* OUT variables */
+    /* System time when the idletime snapshots were taken. */
+    uint64_t now;
+};
+typedef struct xenpf_getidletime xenpf_getidletime_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
+
+struct xen_platform_op {
+    uint32_t cmd;
+    uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
+    union {
+        struct xenpf_settime           settime;
+        struct xenpf_add_memtype       add_memtype;
+        struct xenpf_del_memtype       del_memtype;
+        struct xenpf_read_memtype      read_memtype;
+        struct xenpf_microcode_update  microcode;
+        struct xenpf_platform_quirk    platform_quirk;
+        struct xenpf_firmware_info     firmware_info;
+        struct xenpf_enter_acpi_sleep  enter_acpi_sleep;
+        struct xenpf_change_freq       change_freq;
+        struct xenpf_getidletime       getidletime;
+        uint8_t                        pad[128];
+    } u;
+};
+typedef struct xen_platform_op xen_platform_op_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_platform_op_t);
+
+#endif /* __XEN_PUBLIC_PLATFORM_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 2befa3e..18b5599 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -461,6 +461,8 @@ typedef uint8_t xen_domain_handle_t[16];
 #define __mk_unsigned_long(x) x ## UL
 #define mk_unsigned_long(x) __mk_unsigned_long(x)
 
+DEFINE_GUEST_HANDLE(uint64_t);
+
 #else /* __ASSEMBLY__ */
 
 /* In assembly code we cannot use C numeric constant suffixes. */
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 05/15] xen mtrr: Add mtrr_ops support for Xen mtrr
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (18 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 04/15] xen dom0: Add support for the platform_ops hypercall Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 06/15] xen: disable PAT Jeremy Fitzhardinge
                             ` (9 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Stephen Tweedie, Jeremy Fitzhardinge

From: Stephen Tweedie <sct@redhat.com>

Impact: add basic MTRR support (enough to get started)

Add a Xen mtrr type, and reorganise mtrr initialisation slightly to
allow the mtrr driver to set up num_var_ranges (Xen needs to do this by
querying the hypervisor itself.)

Only the boot path is handled for now: we set up a xen-specific mtrr_if
and set up the mtrr tables based on hypervisor information, but we don't
yet handle mtrr entry add/delete.

Signed-off-by: Stephen Tweedie <sct@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/cpu/mtrr/Makefile  |    1 +
 arch/x86/kernel/cpu/mtrr/amd.c     |    1 +
 arch/x86/kernel/cpu/mtrr/centaur.c |    1 +
 arch/x86/kernel/cpu/mtrr/cyrix.c   |    1 +
 arch/x86/kernel/cpu/mtrr/generic.c |    1 +
 arch/x86/kernel/cpu/mtrr/main.c    |   11 +++++--
 arch/x86/kernel/cpu/mtrr/mtrr.h    |    5 +++
 arch/x86/kernel/cpu/mtrr/xen.c     |   59 ++++++++++++++++++++++++++++++++++++
 8 files changed, 77 insertions(+), 3 deletions(-)
 create mode 100644 arch/x86/kernel/cpu/mtrr/xen.c

diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index f4361b5..404e458 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,4 @@
 obj-y		:= main.o if.o generic.o state.o cleanup.o
 obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
+obj-$(CONFIG_XEN_DOM0) += xen.o
 
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index ee2331b..7bf23de 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -108,6 +108,7 @@ static struct mtrr_ops amd_mtrr_ops = {
 	.get_free_region   = generic_get_free_region,
 	.validate_add_page = amd_validate_add_page,
 	.have_wrcomb       = positive_have_wrcomb,
+	.num_var_ranges	   = common_num_var_ranges,
 };
 
 int __init amd_init_mtrr(void)
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index cb9aa3a..7e3f74f 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -213,6 +213,7 @@ static struct mtrr_ops centaur_mtrr_ops = {
 	.get_free_region   = centaur_get_free_region,
 	.validate_add_page = centaur_validate_add_page,
 	.have_wrcomb       = positive_have_wrcomb,
+	.num_var_ranges	   = common_num_var_ranges,
 };
 
 int __init centaur_init_mtrr(void)
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index ff14c32..c7bb5e3 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -263,6 +263,7 @@ static struct mtrr_ops cyrix_mtrr_ops = {
 	.get_free_region   = cyrix_get_free_region,
 	.validate_add_page = generic_validate_add_page,
 	.have_wrcomb       = positive_have_wrcomb,
+	.num_var_ranges	   = common_num_var_ranges,
 };
 
 int __init cyrix_init_mtrr(void)
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 37f28fc..9fd8ebf 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -725,4 +725,5 @@ struct mtrr_ops generic_mtrr_ops = {
 	.set               = generic_set_mtrr,
 	.validate_add_page = generic_validate_add_page,
 	.have_wrcomb       = generic_have_wrcomb,
+	.num_var_ranges	   = common_num_var_ranges,
 };
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 03cda01..fd5ac04 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -99,7 +99,7 @@ static int have_wrcomb(void)
 }
 
 /*  This function returns the number of variable MTRRs  */
-static void __init set_num_var_ranges(void)
+int __init common_num_var_ranges(void)
 {
 	unsigned long config = 0, dummy;
 
@@ -109,7 +109,7 @@ static void __init set_num_var_ranges(void)
 		config = 2;
 	else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
 		config = 8;
-	num_var_ranges = config & 0xff;
+	return config & 0xff;
 }
 
 static void __init init_table(void)
@@ -622,12 +622,17 @@ int __initdata changed_by_mtrr_cleanup;
 void __init mtrr_bp_init(void)
 {
 	u32 phys_addr;
+
 	init_ifs();
 
 	phys_addr = 32;
 
 	if (cpu_has_mtrr) {
 		mtrr_if = &generic_mtrr_ops;
+#ifdef CONFIG_XEN_DOM0
+		xen_init_mtrr();
+#endif
+
 		size_or_mask = 0xff000000;	/* 36 bits */
 		size_and_mask = 0x00f00000;
 		phys_addr = 36;
@@ -685,7 +690,7 @@ void __init mtrr_bp_init(void)
 	}
 
 	if (mtrr_if) {
-		set_num_var_ranges();
+		num_var_ranges = mtrr_if->num_var_ranges();
 		init_table();
 		if (use_intel()) {
 			get_mtrr_state();
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 77f67f7..3502f6c 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -41,6 +41,8 @@ struct mtrr_ops {
 	int	(*validate_add_page)(unsigned long base, unsigned long size,
 				     unsigned int type);
 	int	(*have_wrcomb)(void);
+
+	int	(*num_var_ranges)(void);
 };
 
 extern int generic_get_free_region(unsigned long base, unsigned long size,
@@ -52,6 +54,8 @@ extern struct mtrr_ops generic_mtrr_ops;
 
 extern int positive_have_wrcomb(void);
 
+extern int __init common_num_var_ranges(void);
+
 /* library functions for processor-specific routines */
 struct set_mtrr_context {
 	unsigned long flags;
@@ -89,6 +93,7 @@ void mtrr_wrmsr(unsigned, unsigned, unsigned);
 int amd_init_mtrr(void);
 int cyrix_init_mtrr(void);
 int centaur_init_mtrr(void);
+void xen_init_mtrr(void);
 
 extern int changed_by_mtrr_cleanup;
 extern int mtrr_cleanup(unsigned address_bits);
diff --git a/arch/x86/kernel/cpu/mtrr/xen.c b/arch/x86/kernel/cpu/mtrr/xen.c
new file mode 100644
index 0000000..db3ef39
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/xen.c
@@ -0,0 +1,59 @@
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <linux/mutex.h>
+
+#include <asm/mtrr.h>
+#include "mtrr.h"
+
+#include <xen/interface/platform.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+static int __init xen_num_var_ranges(void);
+
+/* DOM0 TODO: Need to fill in the remaining mtrr methods to have full
+ * working userland mtrr support. */
+static struct mtrr_ops xen_mtrr_ops = {
+	.vendor            = X86_VENDOR_UNKNOWN,
+//	.set               = xen_set_mtrr,
+//	.get               = xen_get_mtrr,
+	.get_free_region   = generic_get_free_region,
+//	.validate_add_page = xen_validate_add_page,
+	.have_wrcomb       = positive_have_wrcomb,
+	.use_intel_if	   = 0,
+	.num_var_ranges	   = xen_num_var_ranges,
+};
+
+static int __init xen_num_var_ranges(void)
+{
+	int ranges;
+	struct xen_platform_op op;
+
+	for (ranges = 0; ; ranges++) {
+		op.cmd = XENPF_read_memtype;
+		op.u.read_memtype.reg = ranges;
+		if (HYPERVISOR_dom0_op(&op) != 0)
+			break;
+	}
+	return ranges;
+}
+
+void __init xen_init_mtrr(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	if (!xen_initial_domain())
+		return;
+
+	if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
+	    (!cpu_has(c, X86_FEATURE_K6_MTRR)) &&
+	    (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) &&
+	    (!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
+		return;
+
+	mtrr_if = &xen_mtrr_ops;
+}
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 06/15] xen: disable PAT
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (19 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 05/15] xen mtrr: Add mtrr_ops support for Xen mtrr Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 07/15] xen/dom0: use _PAGE_IOMAP in ioremap to do machine mappings Jeremy Fitzhardinge
                             ` (8 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Ian Campbell

From: Ian Campbell <ian.campbell@citrix.com>

Impact: disable PAT under Xen

Xen imposes a particular PAT layout on all paravirtual guests which
does not match the layout Linux would like to use.

Force PAT to be disabled until this is resolved.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 arch/x86/include/asm/pat.h |    5 +++++
 arch/x86/mm/pat.c          |    5 -----
 arch/x86/xen/enlighten.c   |    3 +++
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 2cd07b9..d434a4e 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -6,8 +6,13 @@
 
 #ifdef CONFIG_X86_PAT
 extern int pat_enabled;
+extern void pat_disable(const char *reason);
 #else
 static const int pat_enabled;
+static inline void pat_disable(const char *reason)
+{
+	(void)reason;
+}
 #endif
 
 extern void pat_init(void);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 640339e..4d2a4df 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -43,11 +43,6 @@ static int __init nopat(char *str)
 	return 0;
 }
 early_param("nopat", nopat);
-#else
-static inline void pat_disable(const char *reason)
-{
-	(void)reason;
-}
 #endif
 
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 19af31b..731c096 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -48,6 +48,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/reboot.h>
+#include <asm/pat.h>
 
 #include "xen-ops.h"
 #include "mmu.h"
@@ -1029,6 +1030,8 @@ asmlinkage void __init xen_start_kernel(void)
 		add_preferred_console("hvc", 0, NULL);
 	}
 
+	pat_disable("PAT disabled on Xen");
+
 	xen_raw_console_write("about to get started...\n");
 
 	/* Start the world */
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 07/15] xen/dom0: use _PAGE_IOMAP in ioremap to do machine mappings
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (20 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 06/15] xen: disable PAT Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 08/15] xen/dom0: Use host E820 map Jeremy Fitzhardinge
                             ` (7 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Jeremy Fitzhardinge, Jeremy Fitzhardinge

Impact: allow Xen domain to map real hardware

In a Xen domain, ioremap operates on machine addresses, not
pseudo-physical addresses.  We use _PAGE_IOMAP to determine whether a
mapping is intended for machine addresses.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/xen/page.h |    8 +---
 arch/x86/xen/enlighten.c        |    4 ++-
 arch/x86/xen/mmu.c              |   69 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 018a0a4..bf5f7d3 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -112,13 +112,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
  */
 static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
 {
-	extern unsigned long max_mapnr;
 	unsigned long pfn = mfn_to_pfn(mfn);
-	if ((pfn < max_mapnr)
-	    && !xen_feature(XENFEAT_auto_translated_physmap)
-	    && (get_phys_to_machine(pfn) != mfn))
-		return max_mapnr; /* force !pfn_valid() */
-	/* XXX fixme; not true with sparsemem */
+	if (get_phys_to_machine(pfn) != mfn)
+		return -1; /* force !pfn_valid() */
 	return pfn;
 }
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 731c096..35deb3e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -978,7 +978,9 @@ asmlinkage void __init xen_start_kernel(void)
 
 	/* Prevent unwanted bits from being set in PTEs. */
 	__supported_pte_mask &= ~_PAGE_GLOBAL;
-	if (!xen_initial_domain())
+	if (xen_initial_domain())
+		__supported_pte_mask |= _PAGE_IOMAP;
+	else
 		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
 
 	/* Don't do the full vcpu_info placement stuff until we have a
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index e80e63c..545c00c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -395,6 +395,28 @@ static bool xen_page_pinned(void *ptr)
 	return PagePinned(page);
 }
 
+static bool xen_iomap_pte(pte_t pte)
+{
+	return xen_initial_domain() && (pte_flags(pte) & _PAGE_IOMAP);
+}
+
+static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
+{
+	struct multicall_space mcs;
+	struct mmu_update *u;
+
+	mcs = xen_mc_entry(sizeof(*u));
+	u = mcs.args;
+
+	/* ptep might be kmapped when using 32-bit HIGHPTE */
+	u->ptr = arbitrary_virt_to_machine(ptep).maddr;
+	u->val = pte_val_ma(pteval);
+
+	MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO);
+
+	xen_mc_issue(PARAVIRT_LAZY_MMU);
+}
+
 static void xen_extend_mmu_update(const struct mmu_update *update)
 {
 	struct multicall_space mcs;
@@ -471,6 +493,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval)
 {
+	if (xen_iomap_pte(pteval)) {
+		xen_set_iomap_pte(ptep, pteval);
+		goto out;
+	}
+
 	ADD_STATS(set_pte_at, 1);
 //	ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
 	ADD_STATS(set_pte_at_current, mm == current->mm);
@@ -541,8 +568,25 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
 	return val;
 }
 
+static pteval_t iomap_pte(pteval_t val)
+{
+	if (val & _PAGE_PRESENT) {
+		unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
+		pteval_t flags = val & PTE_FLAGS_MASK;
+
+		/* We assume the pte frame number is a MFN, so
+		   just use it as-is. */
+		val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
+	}
+
+	return val;
+}
+
 pteval_t xen_pte_val(pte_t pte)
 {
+	if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP))
+		return pte.pte;
+
 	return pte_mfn_to_pfn(pte.pte);
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
@@ -555,7 +599,11 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
 
 pte_t xen_make_pte(pteval_t pte)
 {
-	pte = pte_pfn_to_mfn(pte);
+	if (unlikely(xen_initial_domain() && (pte & _PAGE_IOMAP)))
+		pte = iomap_pte(pte);
+	else
+		pte = pte_pfn_to_mfn(pte);
+
 	return native_make_pte(pte);
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
@@ -611,6 +659,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)
 
 void xen_set_pte(pte_t *ptep, pte_t pte)
 {
+	if (xen_iomap_pte(pte)) {
+		xen_set_iomap_pte(ptep, pte);
+		return;
+	}
+
 	ADD_STATS(pte_update, 1);
 //	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
 	ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
@@ -627,6 +680,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
 #ifdef CONFIG_X86_PAE
 void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
+	if (xen_iomap_pte(pte)) {
+		xen_set_iomap_pte(ptep, pte);
+		return;
+	}
+
 	set_64bit((u64 *)ptep, native_pte_val(pte));
 }
 
@@ -1844,9 +1902,16 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
 		pte = pfn_pte(phys, prot);
 		break;
 
-	default:
+	case FIX_PARAVIRT_BOOTMAP:
+		/* This is an MFN, but it isn't an IO mapping from the
+		   IO domain */
 		pte = mfn_pte(phys, prot);
 		break;
+
+	default:
+		/* By default, set_fixmap is used for hardware mappings */
+		pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
+		break;
 	}
 
 	__native_set_fixmap(idx, pte);
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 08/15] xen/dom0: Use host E820 map
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (21 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 07/15] xen/dom0: use _PAGE_IOMAP in ioremap to do machine mappings Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 09/15] xen: implement XENMEM_machphys_mapping Jeremy Fitzhardinge
                             ` (6 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Ian Campbell, Jeremy Fitzhardinge

From: Ian Campbell <ian.campbell@citrix.com>

Impact: synthesize virtual E820 using real one as template

Unlike the non-paravirt Xen port we do not have distinct psuedo-physical
and I/O memory resource-spaces and therefore resources in the two
can clash. Fix this by registering a memory map which matches the
underlying I/O map. Currently this wastes the memory in the reserved
regions. Eventually we should remap this memory to the end of the
address space.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/setup.c           |   45 +++++++++++++++++++++++++++++++++++++--
 include/xen/interface/memory.h |   29 +++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 3e4cf46..175396c 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -19,6 +19,7 @@
 
 #include <xen/page.h>
 #include <xen/interface/callback.h>
+#include <xen/interface/memory.h>
 #include <xen/interface/physdev.h>
 #include <xen/features.h>
 
@@ -36,16 +37,54 @@ extern void xen_syscall32_target(void);
 /**
  * machine_specific_memory_setup - Hook for machine specific memory setup.
  **/
-
 char * __init xen_memory_setup(void)
 {
 	unsigned long max_pfn = xen_start_info->nr_pages;
+	unsigned long long mem_end;
+	int rc;
+	struct xen_memory_map memmap;
+	/*
+	 * This is rather large for a stack variable but this early in
+	 * the boot process we know we have plenty slack space.
+	 */
+	struct e820entry map[E820MAX];
+	int op = xen_initial_domain() ?
+		XENMEM_machine_memory_map :
+		XENMEM_memory_map;
+	int i;
 
 	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
+	mem_end = PFN_PHYS((u64)max_pfn);
+
+	memmap.nr_entries = E820MAX;
+	set_xen_guest_handle(memmap.buffer, map);
+
+	rc = HYPERVISOR_memory_op(op, &memmap);
+	if (rc == -ENOSYS) {
+		memmap.nr_entries = 1;
+		map[0].addr = 0ULL;
+		map[0].size = mem_end;
+		/* 8MB slack (to balance backend allocations). */
+		map[0].size += 8ULL << 20;
+		map[0].type = E820_RAM;
+		rc = 0;
+	}
+	BUG_ON(rc);
 
 	e820.nr_map = 0;
-
-	e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM);
+	for (i = 0; i < memmap.nr_entries; i++) {
+		unsigned long long end = map[i].addr + map[i].size;
+		if (map[i].type == E820_RAM) {
+			if (map[i].addr > mem_end)
+				continue;
+			if (end > mem_end) {
+				/* Truncate region to max_mem. */
+				map[i].size -= end - mem_end;
+			}
+		}
+		if (map[i].size > 0)
+			e820_add_region(map[i].addr, map[i].size, map[i].type);
+	}
 
 	/*
 	 * Even though this is normal, usable memory under Xen, reserve
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index af36ead..e6c6bcb 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -142,4 +142,33 @@ struct xen_translate_gpfn_list {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
 
+/*
+ * Returns the pseudo-physical memory map as it was when the domain
+ * was started (specified by XENMEM_set_memory_map).
+ * arg == addr of struct xen_memory_map.
+ */
+#define XENMEM_memory_map           9
+struct xen_memory_map {
+    /*
+     * On call the number of entries which can be stored in buffer. On
+     * return the number of entries which have been stored in
+     * buffer.
+     */
+    unsigned int nr_entries;
+
+    /*
+     * Entries in the buffer are in the same format as returned by the
+     * BIOS INT 0x15 EAX=0xE820 call.
+     */
+    GUEST_HANDLE(void) buffer;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
+
+/*
+ * Returns the real physical memory map. Passes the same structure as
+ * XENMEM_memory_map.
+ * arg == addr of struct xen_memory_map.
+ */
+#define XENMEM_machine_memory_map   10
+
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 09/15] xen: implement XENMEM_machphys_mapping
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (22 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 08/15] xen/dom0: Use host E820 map Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 10/15] xen: clear reserved bits in l3 entries given in the initial pagetables Jeremy Fitzhardinge
                             ` (5 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Ian Campbell, Jeremy Fitzhardinge

From: Ian Campbell <ian.campbell@citrix.com>

Impact: add hypercall definitions

This hypercall allows Xen to specify a non-default location for the
machine to physical mapping. This capability is used when running a 32
bit domain 0 on a 64 bit hypervisor to shrink the hypervisor hole to
exactly the size required.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/xen/interface.h    |    6 +++---
 arch/x86/include/asm/xen/interface_32.h |    5 +++++
 arch/x86/include/asm/xen/interface_64.h |   13 +------------
 arch/x86/include/asm/xen/page.h         |    7 ++++---
 arch/x86/xen/enlighten.c                |    8 +++++++-
 arch/x86/xen/mmu.c                      |   15 +++++++++++++++
 include/xen/interface/memory.h          |   13 +++++++++++++
 7 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index e8506c1..1c10c88 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void);
 #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
 #endif
 
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
+#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT)
 
 /* Maximum number of virtual CPUs in multi-processor guests. */
 #define MAX_VIRT_CPUS 32
diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h
index 42a7e00..8413688 100644
--- a/arch/x86/include/asm/xen/interface_32.h
+++ b/arch/x86/include/asm/xen/interface_32.h
@@ -32,6 +32,11 @@
 /* And the trap vector is... */
 #define TRAP_INSTR "int $0x82"
 
+#define __MACH2PHYS_VIRT_START 0xF5800000
+#define __MACH2PHYS_VIRT_END   0xF6800000
+
+#define __MACH2PHYS_SHIFT      2
+
 /*
  * Virtual addresses beyond this are not modifiable by guest OSes. The
  * machine->physical mapping table starts at this address, read-only.
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h
index 100d266..839a481 100644
--- a/arch/x86/include/asm/xen/interface_64.h
+++ b/arch/x86/include/asm/xen/interface_64.h
@@ -39,18 +39,7 @@
 #define __HYPERVISOR_VIRT_END   0xFFFF880000000000
 #define __MACH2PHYS_VIRT_START  0xFFFF800000000000
 #define __MACH2PHYS_VIRT_END    0xFFFF804000000000
-
-#ifndef HYPERVISOR_VIRT_START
-#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
-#define HYPERVISOR_VIRT_END   mk_unsigned_long(__HYPERVISOR_VIRT_END)
-#endif
-
-#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
-#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
-#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
+#define __MACH2PHYS_SHIFT       3
 
 /*
  * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index bf5f7d3..d5fb2e8 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -5,6 +5,7 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/pfn.h>
+#include <linux/mm.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -35,6 +36,8 @@ typedef struct xpaddr {
 #define MAX_DOMAIN_PAGES						\
     ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
 
+extern unsigned long *machine_to_phys_mapping;
+extern unsigned int   machine_to_phys_order;
 
 extern unsigned long get_phys_to_machine(unsigned long pfn);
 extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
@@ -62,10 +65,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return mfn;
 
-#if 0
 	if (unlikely((mfn >> machine_to_phys_order) != 0))
-		return max_mapnr;
-#endif
+		return ~0;
 
 	pfn = 0;
 	/*
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 35deb3e..f7c8afb 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -62,6 +62,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
 enum xen_domain_type xen_domain_type = XEN_NATIVE;
 EXPORT_SYMBOL_GPL(xen_domain_type);
 
+unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
+EXPORT_SYMBOL(machine_to_phys_mapping);
+unsigned int   machine_to_phys_order;
+EXPORT_SYMBOL(machine_to_phys_order);
+
 struct start_info *xen_start_info;
 EXPORT_SYMBOL_GPL(xen_start_info);
 
@@ -913,7 +918,6 @@ static const struct machine_ops __initdata xen_machine_ops = {
 	.emergency_restart = xen_emergency_restart,
 };
 
-
 /* First C function to be called on Xen boot */
 asmlinkage void __init xen_start_kernel(void)
 {
@@ -928,6 +932,8 @@ asmlinkage void __init xen_start_kernel(void)
 
 	xen_setup_features();
 
+	xen_setup_machphys_mapping();
+
 	/* Install Xen paravirt ops */
 	pv_info = xen_info;
 	pv_init_ops = xen_init_ops;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 545c00c..2834852 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -57,6 +57,7 @@
 #include <xen/page.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/version.h>
+#include <xen/interface/memory.h>
 #include <xen/hvc-console.h>
 
 #include "multicalls.h"
@@ -1749,6 +1750,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 	set_page_prot(pmd, PAGE_KERNEL_RO);
 }
 
+void __init xen_setup_machphys_mapping(void)
+{
+	struct xen_machphys_mapping mapping;
+	unsigned long machine_to_phys_nr_ents;
+
+	if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
+		machine_to_phys_mapping = (unsigned long *)mapping.v_start;
+		machine_to_phys_nr_ents = mapping.max_mfn + 1;
+	} else {
+		machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+	}
+	machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
+}
+
 #ifdef CONFIG_X86_64
 static void convert_pfn_mfn(void *v)
 {
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index e6c6bcb..f548f7c 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -97,6 +97,19 @@ struct xen_machphys_mfn_list {
 DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
 
 /*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping     12
+struct xen_machphys_mapping {
+    unsigned long v_start, v_end; /* Start and end virtual addresses.   */
+    unsigned long max_mfn;        /* Maximum MFN that can be looked up. */
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t);
+
+/*
  * Sets the GPFN at which a particular page appears in the specified guest's
  * pseudophysical address space.
  * arg == addr of xen_add_to_physmap_t.
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 10/15] xen: clear reserved bits in l3 entries given in the initial pagetables
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (23 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 09/15] xen: implement XENMEM_machphys_mapping Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 11/15] xen: allow enable use of VGA console on dom0 Jeremy Fitzhardinge
                             ` (4 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Ian Campbell, Jeremy Fitzhardinge

From: Ian Campbell <ian.campbell@citrix.com>

Impact: workaround Xen bug in 32-on-64 dom0

In native PAE, the only flag that may be legitimately set in an L3
entry is Present.  When Xen grafts the top-level PAE L3 pagetable
entries into the L4 pagetable, it must also set the other permissions
flags so that the mapped pages are actually accessible.

However, due to a bug in the hypervisor, it validates update to the L3
entries as formal PAE entries, so it will refuse to validate these
entries with the extra bits requires for 4-level pagetables.

This patch simply masks the entries back to the bare PAE level,
leaving Xen to add whatever bits it feels are necessary.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |   15 +++++++++++++++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2834852..463a819 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1855,6 +1855,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
 					 unsigned long max_pfn)
 {
 	pmd_t *kernel_pmd;
+	int i;
 
 	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
 				  xen_start_info->nr_pt_frames * PAGE_SIZE +
@@ -1866,6 +1867,20 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
 	xen_map_identity_early(level2_kernel_pgt, max_pfn);
 
 	memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+
+	/*
+	 * When running a 32 bit domain 0 on a 64 bit hypervisor a
+	 * pinned L3 (such as the initial pgd here) contains bits
+	 * which are reserved in the PAE layout but not in the 64 bit
+	 * layout. Unfortunately some versions of the hypervisor
+	 * (incorrectly) validate compat mode guests against the PAE
+	 * layout and hence will not allow such a pagetable to be
+	 * pinned by the guest. Therefore we mask off only the PFN and
+	 * Present bits of the supplied L3.
+	 */
+	for (i = 0; i < PTRS_PER_PGD; i++)
+		swapper_pg_dir[i].pgd &= (PTE_PFN_MASK | _PAGE_PRESENT);
+
 	set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
 			__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
 
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 11/15] xen: allow enable use of VGA console on dom0
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (24 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 10/15] xen: clear reserved bits in l3 entries given in the initial pagetables Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 12/15] xen/dom0: add XEN_DOM0 config option Jeremy Fitzhardinge
                             ` (3 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Jeremy Fitzhardinge, Jeremy Fitzhardinge

Impact: make VGA console work in dom0

Get the information about the VGA console hardware from Xen, and put
it into the form the bootloader normally generates, so that the rest
of the kernel can deal with VGA as usual.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/Makefile       |    3 +-
 arch/x86/xen/enlighten.c    |   10 ++++++
 arch/x86/xen/vga.c          |   67 +++++++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/xen-ops.h      |   11 +++++++
 include/xen/interface/xen.h |   39 +++++++++++++++++++++++++
 5 files changed, 129 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/xen/vga.c

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3b767d0..c4cda96 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -10,4 +10,5 @@ obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 			grant-table.o suspend.o
 
 obj-$(CONFIG_SMP)		+= smp.o spinlock.o
-obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
\ No newline at end of file
+obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
+obj-$(CONFIG_XEN_DOM0)		+= vga.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f7c8afb..40a930b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1036,6 +1036,16 @@ asmlinkage void __init xen_start_kernel(void)
 		add_preferred_console("xenboot", 0, NULL);
 		add_preferred_console("tty", 0, NULL);
 		add_preferred_console("hvc", 0, NULL);
+
+		boot_params.screen_info.orig_video_isVGA = 0;
+	} else {
+		const struct dom0_vga_console_info *info =
+			(void *)((char *)xen_start_info +
+			         xen_start_info->console.dom0.info_off);
+
+		xen_init_vga(info, xen_start_info->console.dom0.info_size);
+		xen_start_info->console.domU.mfn = 0;
+		xen_start_info->console.domU.evtchn = 0;
 	}
 
 	pat_disable("PAT disabled on Xen");
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
new file mode 100644
index 0000000..f2fca2c
--- /dev/null
+++ b/arch/x86/xen/vga.c
@@ -0,0 +1,67 @@
+#include <linux/screen_info.h>
+#include <linux/init.h>
+
+#include <asm/bootparam.h>
+#include <asm/setup.h>
+
+#include <xen/interface/xen.h>
+
+#include "xen-ops.h"
+
+void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
+{
+	struct screen_info *screen_info = &boot_params.screen_info;
+
+	/* This is drawn from a dump from vgacon:startup in
+	 * standard Linux. */
+	screen_info->orig_video_mode = 3;
+	screen_info->orig_video_isVGA = 1;
+	screen_info->orig_video_lines = 25;
+	screen_info->orig_video_cols = 80;
+	screen_info->orig_video_ega_bx = 3;
+	screen_info->orig_video_points = 16;
+	screen_info->orig_y = screen_info->orig_video_lines - 1;
+
+	switch (info->video_type) {
+	case XEN_VGATYPE_TEXT_MODE_3:
+		if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3)
+		           + sizeof(info->u.text_mode_3))
+			break;
+		screen_info->orig_video_lines = info->u.text_mode_3.rows;
+		screen_info->orig_video_cols = info->u.text_mode_3.columns;
+		screen_info->orig_x = info->u.text_mode_3.cursor_x;
+		screen_info->orig_y = info->u.text_mode_3.cursor_y;
+		screen_info->orig_video_points =
+			info->u.text_mode_3.font_height;
+		break;
+
+	case XEN_VGATYPE_VESA_LFB:
+		if (size < offsetof(struct dom0_vga_console_info,
+		                    u.vesa_lfb.gbl_caps))
+			break;
+		screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB;
+		screen_info->lfb_width = info->u.vesa_lfb.width;
+		screen_info->lfb_height = info->u.vesa_lfb.height;
+		screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel;
+		screen_info->lfb_base = info->u.vesa_lfb.lfb_base;
+		screen_info->lfb_size = info->u.vesa_lfb.lfb_size;
+		screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line;
+		screen_info->red_size = info->u.vesa_lfb.red_size;
+		screen_info->red_pos = info->u.vesa_lfb.red_pos;
+		screen_info->green_size = info->u.vesa_lfb.green_size;
+		screen_info->green_pos = info->u.vesa_lfb.green_pos;
+		screen_info->blue_size = info->u.vesa_lfb.blue_size;
+		screen_info->blue_pos = info->u.vesa_lfb.blue_pos;
+		screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
+		screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
+		if (size >= offsetof(struct dom0_vga_console_info,
+		                     u.vesa_lfb.gbl_caps)
+		            + sizeof(info->u.vesa_lfb.gbl_caps))
+			screen_info->capabilities = info->u.vesa_lfb.gbl_caps;
+		if (size >= offsetof(struct dom0_vga_console_info,
+		                     u.vesa_lfb.mode_attrs)
+		            + sizeof(info->u.vesa_lfb.mode_attrs))
+			screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs;
+		break;
+	}
+}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 6ff6203..40abcef 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -72,6 +72,17 @@ static inline void xen_smp_init(void) {}
 #endif
 
 
+struct dom0_vga_console_info;
+
+#ifdef CONFIG_XEN_DOM0
+void xen_init_vga(const struct dom0_vga_console_info *, size_t size);
+#else
+static inline void xen_init_vga(const struct dom0_vga_console_info *info,
+				size_t size)
+{
+}
+#endif
+
 /* Declare an asm function, along with symbols needed to make it
    inlineable */
 #define DECL_ASM(ret, name, ...)		\
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 18b5599..6c0af21 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -449,6 +449,45 @@ struct start_info {
 	int8_t cmd_line[MAX_GUEST_CMDLINE];
 };
 
+struct dom0_vga_console_info {
+    uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */
+#define XEN_VGATYPE_TEXT_MODE_3 0x03
+#define XEN_VGATYPE_VESA_LFB    0x23
+
+    union {
+        struct {
+            /* Font height, in pixels. */
+            uint16_t font_height;
+            /* Cursor location (column, row). */
+            uint16_t cursor_x, cursor_y;
+            /* Number of rows and columns (dimensions in characters). */
+            uint16_t rows, columns;
+        } text_mode_3;
+
+        struct {
+            /* Width and height, in pixels. */
+            uint16_t width, height;
+            /* Bytes per scan line. */
+            uint16_t bytes_per_line;
+            /* Bits per pixel. */
+            uint16_t bits_per_pixel;
+            /* LFB physical address, and size (in units of 64kB). */
+            uint32_t lfb_base;
+            uint32_t lfb_size;
+            /* RGB mask offsets and sizes, as defined by VBE 1.2+ */
+            uint8_t  red_pos, red_size;
+            uint8_t  green_pos, green_size;
+            uint8_t  blue_pos, blue_size;
+            uint8_t  rsvd_pos, rsvd_size;
+
+            /* VESA capabilities (offset 0xa, VESA command 0x4f00). */
+            uint32_t gbl_caps;
+            /* Mode attributes (offset 0x0, VESA command 0x4f01). */
+            uint16_t mode_attrs;
+        } vesa_lfb;
+    } u;
+};
+
 /* These flags are passed in the 'flags' field of start_info_t. */
 #define SIF_PRIVILEGED    (1<<0)  /* Is the domain privileged? */
 #define SIF_INITDOMAIN    (1<<1)  /* Is this the initial control domain? */
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 12/15] xen/dom0: add XEN_DOM0 config option
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (25 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 11/15] xen: allow enable use of VGA console on dom0 Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 13/15] x86: make /dev/mem mappings _PAGE_IOMAP Jeremy Fitzhardinge
                             ` (2 subsequent siblings)
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Jeremy Fitzhardinge, Jeremy Fitzhardinge

Impact: add Kconfig to enable XEN_DOM0

Allow dom0 to be configured.  Requires more patches to do something useful.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/Kconfig     |   26 ++++++++++++++++++++++++++
 arch/x86/xen/enlighten.c |    5 +++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index b83e119..fe69286 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -36,3 +36,29 @@ config XEN_DEBUG_FS
 	help
 	  Enable statistics output and various tuning options in debugfs.
 	  Enabling this option may incur a significant performance overhead.
+
+config XEN_DOM0
+	bool "Enable Xen privileged domain support"
+	depends on XEN && X86_IO_APIC && ACPI
+	help
+	  The Xen hypervisor requires a privileged domain ("dom0") to
+	  actually manage the machine, provide devices drivers, etc.
+	  This option enables dom0 support.  A dom0 kernel can also
+	  run as an unprivileged domU kernel, or a kernel running
+	  native on bare hardware.
+
+# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST
+# name in tools.
+config XEN_PRIVILEGED_GUEST
+	def_bool XEN_DOM0
+
+config XEN_PCI_PASSTHROUGH
+       bool #"Enable support for Xen PCI passthrough devices"
+       depends on XEN && PCI
+       help
+         Enable support for passing PCI devices through to
+	 unprivileged domains. (COMPLETELY UNTESTED)
+
+config XEN_DOM0_PCI
+       def_bool y
+       depends on XEN_DOM0 && PCI
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 40a930b..213e1ba 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -169,9 +169,10 @@ static void __init xen_banner(void)
 
 	printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
 	       pv_info.name);
-	printk(KERN_INFO "Xen version: %d.%d%s%s\n",
+	printk(KERN_INFO "Xen version: %d.%d%s%s%s\n",
 	       version >> 16, version & 0xffff, extra.extraversion,
-	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
+	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "",
+	       xen_initial_domain() ? " (dom0)" : "");
 }
 
 static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 13/15] x86: make /dev/mem mappings _PAGE_IOMAP
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (26 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 12/15] xen/dom0: add XEN_DOM0 config option Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 14/15] paravirtualize IO permission bitmap Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 15/15] x86: don't need "changed" parameter for set_io_bitmap() Jeremy Fitzhardinge
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Jeremy Fitzhardinge

From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Impact: allow hardware to be mapped via /dev/mem

Use _PAGE_IOMAP on /dev/mem mappings.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/mm/pat.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 4d2a4df..56899ae 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -504,7 +504,7 @@ int free_memtype(u64 start, u64 end)
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 				unsigned long size, pgprot_t vma_prot)
 {
-	return vma_prot;
+	return __pgprot(pgprot_val(vma_prot) | _PAGE_IOMAP);
 }
 
 #ifdef CONFIG_STRICT_DEVMEM
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 14/15] paravirtualize IO permission bitmap
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (27 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 13/15] x86: make /dev/mem mappings _PAGE_IOMAP Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  2009-03-23 18:09           ` [PATCH 15/15] x86: don't need "changed" parameter for set_io_bitmap() Jeremy Fitzhardinge
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Christophe Saout, Jeremy Fitzhardinge

From: Christophe Saout <chtephan@leto.intern.saout.de>

Impact: make ioperm bitmap work under Xen

A PV Xen guest kernel has no TSS of its own, so the IO permission
bitmap must be paravirtualized.  This patch adds set_io_bitmap
as a paravirt op, and defines a native version which updates the tss,
and a Xen version which uses a hypercall.

This is much easier now that 32 and 64-bit use the same code to
manage the IO bitmap.

Signed-off-by: Christophe Saout <chtephan@leto.intern.saout.de>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/paravirt.h  |    9 +++++++++
 arch/x86/include/asm/processor.h |    4 ++++
 arch/x86/kernel/ioport.c         |   34 ++++++++++++++++++++++++++--------
 arch/x86/kernel/paravirt.c       |    1 +
 arch/x86/kernel/process.c        |   27 +++++++--------------------
 arch/x86/xen/enlighten.c         |   16 ++++++++++++++++
 6 files changed, 63 insertions(+), 28 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index bc384be..fd2c31b 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -156,6 +156,8 @@ struct pv_cpu_ops {
 	void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
 
 	void (*set_iopl_mask)(unsigned mask);
+	void (*set_io_bitmap)(struct thread_struct *thread,
+	                      int changed, unsigned long bytes_updated);
 
 	void (*wbinvd)(void);
 	void (*io_delay)(void);
@@ -996,11 +998,18 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
 {
 	PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
 }
+
 static inline void set_iopl_mask(unsigned mask)
 {
 	PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
 }
 
+static inline void set_io_bitmap(struct thread_struct *thread,
+                                 int changed, unsigned long bytes_updated)
+{
+	PVOP_VCALL3(pv_cpu_ops.set_io_bitmap, thread, changed, bytes_updated);
+}
+
 /* The paravirtualized I/O functions */
 static inline void slow_down_io(void)
 {
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 34c5237..0ed4cba 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -538,6 +538,9 @@ static inline void native_set_iopl_mask(unsigned mask)
 #endif
 }
 
+extern void native_set_io_bitmap(struct thread_struct *thread,
+                                 int changed, unsigned long updated_bytes);
+
 static inline void
 native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
 {
@@ -579,6 +582,7 @@ static inline void load_sp0(struct tss_struct *tss,
 }
 
 #define set_iopl_mask native_set_iopl_mask
+#define set_io_bitmap native_set_io_bitmap
 #endif /* CONFIG_PARAVIRT */
 
 /*
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 99c4d30..5a12c9f 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -30,14 +30,31 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base,
 	}
 }
 
+void native_set_io_bitmap(struct thread_struct *t,
+                          int changed, unsigned long bytes_updated)
+{
+	struct tss_struct *tss;
+
+	if (!bytes_updated)
+		return;
+
+	tss = &__get_cpu_var(init_tss);
+
+	/* Update the TSS: */
+	if (t->io_bitmap_ptr)
+		memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
+	else
+		memset(tss->io_bitmap, 0xff, bytes_updated);
+}
+
 /*
  * this changes the io permissions bitmap in the current task.
  */
 asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 {
 	struct thread_struct *t = &current->thread;
-	struct tss_struct *tss;
 	unsigned int i, max_long, bytes, bytes_updated;
+	int changed;
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
@@ -58,16 +75,18 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 		memset(bitmap, 0xff, IO_BITMAP_BYTES);
 		t->io_bitmap_ptr = bitmap;
 		set_thread_flag(TIF_IO_BITMAP);
-	}
+		changed = 1;
+	} else
+		changed = 0;
 
 	/*
-	 * do it in the per-thread copy and in the TSS ...
+	 * do it in the per-thread copy
 	 *
-	 * Disable preemption via get_cpu() - we must not switch away
+	 * Disable preemption - we must not switch away
 	 * because the ->io_bitmap_max value must match the bitmap
 	 * contents:
 	 */
-	tss = &per_cpu(init_tss, get_cpu());
+	preempt_disable();
 
 	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
 
@@ -85,10 +104,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 
 	t->io_bitmap_max = bytes;
 
-	/* Update the TSS: */
-	memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
+	set_io_bitmap(t, changed, bytes_updated);
 
-	put_cpu();
+	preempt_enable();
 
 	return 0;
 }
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index aa34423..1c15178 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -398,6 +398,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.swapgs = native_swapgs,
 
 	.set_iopl_mask = native_set_iopl_mask,
+	.set_io_bitmap = native_set_io_bitmap,
 	.io_delay = native_io_delay,
 
 	.start_context_switch = paravirt_nop,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ac25491..174ab04 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -71,16 +71,12 @@ void exit_thread(void)
 	unsigned long *bp = t->io_bitmap_ptr;
 
 	if (bp) {
-		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
-
+		preempt_disable();
 		t->io_bitmap_ptr = NULL;
 		clear_thread_flag(TIF_IO_BITMAP);
-		/*
-		 * Careful, clear this in the TSS too:
-		 */
-		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
+		set_io_bitmap(t, 1, t->io_bitmap_max);
 		t->io_bitmap_max = 0;
-		put_cpu();
+		preempt_enable();
 		kfree(bp);
 	}
 
@@ -211,19 +207,10 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 			hard_enable_TSC();
 	}
 
-	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
-		/*
-		 * Copy the relevant range of the IO bitmap.
-		 * Normally this is 128 bytes or less:
-		 */
-		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
-		       max(prev->io_bitmap_max, next->io_bitmap_max));
-	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
-		/*
-		 * Clear any possible leftover bits:
-		 */
-		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
-	}
+	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP) ||
+	    test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
+		set_io_bitmap(next, 1,
+		              max(prev->io_bitmap_max, next->io_bitmap_max));
 }
 
 int sys_fork(struct pt_regs *regs)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 213e1ba..61ce3ae 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -568,6 +568,21 @@ static void xen_set_iopl_mask(unsigned mask)
 	HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
 }
 
+static void xen_set_io_bitmap(struct thread_struct *thread,
+                              int changed, unsigned long bytes_updated)
+{
+	struct physdev_set_iobitmap set_iobitmap;
+
+	if (!changed)
+		return;
+
+	set_xen_guest_handle(set_iobitmap.bitmap,
+	                     (char *)thread->io_bitmap_ptr);
+	set_iobitmap.nr_ports = thread->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
+	WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap,
+	                              &set_iobitmap));
+}
+
 static void xen_io_delay(void)
 {
 }
@@ -861,6 +876,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.load_sp0 = xen_load_sp0,
 
 	.set_iopl_mask = xen_set_iopl_mask,
+	.set_io_bitmap = xen_set_io_bitmap,
 	.io_delay = xen_io_delay,
 
 	/* Xen takes care of %gs when switching to usermode for us */
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 15/15] x86: don't need "changed" parameter for set_io_bitmap()
       [not found]         ` <49C45238.7050007@zytor.com>
                             ` (28 preceding siblings ...)
  2009-03-23 18:09           ` [PATCH 14/15] paravirtualize IO permission bitmap Jeremy Fitzhardinge
@ 2009-03-23 18:09           ` Jeremy Fitzhardinge
  29 siblings, 0 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-03-23 18:09 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: the arch/x86 maintainers, Ingo Molnar, Linux Kernel Mailing List,
	Xen-devel, Jeremy Fitzhardinge

From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Impact: cleanup

The "changed" parameter isn't really used, so don't bother passing it.
(xen_set_io_bitmap can keep track of it for itself, if it wants.)

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/paravirt.h  |    6 +++---
 arch/x86/include/asm/processor.h |    2 +-
 arch/x86/kernel/ioport.c         |    9 +++------
 arch/x86/kernel/process.c        |    4 ++--
 arch/x86/xen/enlighten.c         |    5 +----
 5 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index fd2c31b..56acbe8 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -157,7 +157,7 @@ struct pv_cpu_ops {
 
 	void (*set_iopl_mask)(unsigned mask);
 	void (*set_io_bitmap)(struct thread_struct *thread,
-	                      int changed, unsigned long bytes_updated);
+	                      unsigned long bytes_updated);
 
 	void (*wbinvd)(void);
 	void (*io_delay)(void);
@@ -1005,9 +1005,9 @@ static inline void set_iopl_mask(unsigned mask)
 }
 
 static inline void set_io_bitmap(struct thread_struct *thread,
-                                 int changed, unsigned long bytes_updated)
+                                 unsigned long bytes_updated)
 {
-	PVOP_VCALL3(pv_cpu_ops.set_io_bitmap, thread, changed, bytes_updated);
+	PVOP_VCALL2(pv_cpu_ops.set_io_bitmap, thread, bytes_updated);
 }
 
 /* The paravirtualized I/O functions */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0ed4cba..15d0769 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -539,7 +539,7 @@ static inline void native_set_iopl_mask(unsigned mask)
 }
 
 extern void native_set_io_bitmap(struct thread_struct *thread,
-                                 int changed, unsigned long updated_bytes);
+                                 unsigned long updated_bytes);
 
 static inline void
 native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 5a12c9f..642660f 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -31,7 +31,7 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base,
 }
 
 void native_set_io_bitmap(struct thread_struct *t,
-                          int changed, unsigned long bytes_updated)
+                          unsigned long bytes_updated)
 {
 	struct tss_struct *tss;
 
@@ -54,7 +54,6 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 {
 	struct thread_struct *t = &current->thread;
 	unsigned int i, max_long, bytes, bytes_updated;
-	int changed;
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
@@ -75,9 +74,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 		memset(bitmap, 0xff, IO_BITMAP_BYTES);
 		t->io_bitmap_ptr = bitmap;
 		set_thread_flag(TIF_IO_BITMAP);
-		changed = 1;
-	} else
-		changed = 0;
+	}
 
 	/*
 	 * do it in the per-thread copy
@@ -104,7 +101,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 
 	t->io_bitmap_max = bytes;
 
-	set_io_bitmap(t, changed, bytes_updated);
+	set_io_bitmap(t, bytes_updated);
 
 	preempt_enable();
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 174ab04..f1be97f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -74,7 +74,7 @@ void exit_thread(void)
 		preempt_disable();
 		t->io_bitmap_ptr = NULL;
 		clear_thread_flag(TIF_IO_BITMAP);
-		set_io_bitmap(t, 1, t->io_bitmap_max);
+		set_io_bitmap(t, t->io_bitmap_max);
 		t->io_bitmap_max = 0;
 		preempt_enable();
 		kfree(bp);
@@ -209,7 +209,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 
 	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP) ||
 	    test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
-		set_io_bitmap(next, 1,
+		set_io_bitmap(next,
 		              max(prev->io_bitmap_max, next->io_bitmap_max));
 }
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 61ce3ae..f673cd8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -569,13 +569,10 @@ static void xen_set_iopl_mask(unsigned mask)
 }
 
 static void xen_set_io_bitmap(struct thread_struct *thread,
-                              int changed, unsigned long bytes_updated)
+                              unsigned long bytes_updated)
 {
 	struct physdev_set_iobitmap set_iobitmap;
 
-	if (!changed)
-		return;
-
 	set_xen_guest_handle(set_iobitmap.bitmap,
 	                     (char *)thread->io_bitmap_ptr);
 	set_iobitmap.nr_ports = thread->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2009-03-23 18:15 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <49C28AC2.4010407@goop.org>
     [not found] ` <49C29381.9050201@zytor.com>
     [not found]   ` <49C29983.3040305@goop.org>
     [not found]     ` <49C2ABCA.6010009@zytor.com>
     [not found]       ` <49C43D31.7040805@goop.org>
     [not found]         ` <49C45238.7050007@zytor.com>
2009-03-23 17:38           ` [GIT PULL] x86/paravirt: allow preemption while doing lazy mmu update Jeremy Fitzhardinge
2009-03-23 17:45           ` [GIT PULL] core Xen updates for 2.6.30 Jeremy Fitzhardinge
2009-03-23 17:55           ` [GIT PULL] xen domU control interfaces Jeremy Fitzhardinge
2009-03-23 17:55           ` [PATCH 01/11] xen: add irq_from_evtchn Jeremy Fitzhardinge
2009-03-23 17:55           ` [PATCH 02/11] xen: add /dev/xen/evtchn driver Jeremy Fitzhardinge
2009-03-23 17:55           ` [PATCH 03/11] xen: export ioctl headers to userspace Jeremy Fitzhardinge
2009-03-23 17:55           ` [PATCH 04/11] xen/dev-evtchn: clean up locking in evtchn Jeremy Fitzhardinge
2009-03-23 17:55           ` [PATCH 05/11] xen: add "capabilities" file Jeremy Fitzhardinge
2009-03-23 17:55           ` [PATCH 06/11] xen: add /sys/hypervisor support Jeremy Fitzhardinge
2009-03-23 17:55           ` [PATCH 07/11] xen/sys/hypervisor: change writable_pt to features Jeremy Fitzhardinge
2009-03-23 17:55           ` [PATCH 08/11] xen: drop kexec bits from /sys/hypervisor since kexec isn't implemented yet Jeremy Fitzhardinge
2009-03-23 17:55           ` [PATCH 09/11] xen: remove suspend_cancel hook Jeremy Fitzhardinge
2009-03-23 17:55           ` [PATCH 10/11] xen: use device model for suspending xenbus devices Jeremy Fitzhardinge
2009-03-23 17:55           ` [PATCH 11/11] xen/xenbus: export xenbus_dev_changed Jeremy Fitzhardinge
2009-03-23 18:09           ` [GIT PULL] xen/dom0: core dom0 support Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 01/15] xen dom0: Make hvc_xen console work for dom0 Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 02/15] xen dom0: Initialize xenbus " Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 03/15] xen dom0: Set up basic IO permissions " Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 04/15] xen dom0: Add support for the platform_ops hypercall Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 05/15] xen mtrr: Add mtrr_ops support for Xen mtrr Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 06/15] xen: disable PAT Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 07/15] xen/dom0: use _PAGE_IOMAP in ioremap to do machine mappings Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 08/15] xen/dom0: Use host E820 map Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 09/15] xen: implement XENMEM_machphys_mapping Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 10/15] xen: clear reserved bits in l3 entries given in the initial pagetables Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 11/15] xen: allow enable use of VGA console on dom0 Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 12/15] xen/dom0: add XEN_DOM0 config option Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 13/15] x86: make /dev/mem mappings _PAGE_IOMAP Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 14/15] paravirtualize IO permission bitmap Jeremy Fitzhardinge
2009-03-23 18:09           ` [PATCH 15/15] x86: don't need "changed" parameter for set_io_bitmap() Jeremy Fitzhardinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).