linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch] paravirt: isolate module ops
@ 2007-01-06  0:07 Ingo Molnar
  2007-01-06  0:31 ` Zachary Amsden
  2007-01-06  1:02 ` Zachary Amsden
  0 siblings, 2 replies; 23+ messages in thread
From: Ingo Molnar @ 2007-01-06  0:07 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, Rusty Russell, Zachary Amsden, Arjan van de Ven,
	Adrian Bunk

Subject: [patch] paravirt: isolate module ops
From: Ingo Molnar <mingo@elte.hu>

only export those operations to modules that have been available to them 
historically: irq disable/enable, io-delay, udelay, etc.

this isolates that functionality from other paravirtualization 
functionality that modules have no business messing with.

boot and build tested with CONFIG_PARAVIRT=y.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/i386/kernel/paravirt.c |   41 +++++++++++++++++++++++++++
 include/asm-i386/delay.h    |    4 +-
 include/asm-i386/paravirt.h |   65 ++++++++++++++++++++++----------------------
 3 files changed, 75 insertions(+), 35 deletions(-)

Index: linux/arch/i386/kernel/paravirt.c
===================================================================
--- linux.orig/arch/i386/kernel/paravirt.c
+++ linux/arch/i386/kernel/paravirt.c
@@ -492,6 +492,7 @@ struct paravirt_ops paravirt_ops = {
 
  	.patch = native_patch,
 	.banner = default_banner,
+
 	.arch_setup = native_nop,
 	.memory_setup = machine_specific_memory_setup,
 	.get_wallclock = native_get_wallclock,
@@ -566,4 +567,42 @@ struct paravirt_ops paravirt_ops = {
 	.irq_enable_sysexit = native_irq_enable_sysexit,
 	.iret = native_iret,
 };
-EXPORT_SYMBOL(paravirt_ops);
+
+/*
+ * These are exported to modules:
+ */
+struct paravirt_ops paravirt_mod_ops = {
+	.name = "bare hardware",
+	.paravirt_enabled = 0,
+	.kernel_rpl = 0,
+
+ 	.patch = native_patch,
+	.banner = default_banner,
+
+	.save_fl = native_save_fl,
+	.restore_fl = native_restore_fl,
+	.irq_disable = native_irq_disable,
+	.irq_enable = native_irq_enable,
+
+	.cpuid = native_cpuid,
+
+	.read_msr = native_read_msr,
+	.write_msr = native_write_msr,
+
+	.read_tsc = native_read_tsc,
+	.read_pmc = native_read_pmc,
+
+	.io_delay = native_io_delay,
+	.const_udelay = __const_udelay,
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	.apic_write = native_apic_write,
+	.apic_write_atomic = native_apic_write_atomic,
+	.apic_read = native_apic_read,
+#endif
+
+	.flush_tlb_user = native_flush_tlb,
+	.flush_tlb_kernel = native_flush_tlb_global,
+	.flush_tlb_single = native_flush_tlb_single,
+};
+EXPORT_SYMBOL(paravirt_mod_ops);
Index: linux/include/asm-i386/delay.h
===================================================================
--- linux.orig/include/asm-i386/delay.h
+++ linux/include/asm-i386/delay.h
@@ -17,9 +17,9 @@ extern void __const_udelay(unsigned long
 extern void __delay(unsigned long loops);
 
 #if defined(CONFIG_PARAVIRT) && !defined(USE_REAL_TIME_DELAY)
-#define udelay(n) paravirt_ops.const_udelay((n) * 0x10c7ul)
+#define udelay(n) paravirt_mod_ops.const_udelay((n) * 0x10c7ul)
 
-#define ndelay(n) paravirt_ops.const_udelay((n) * 5ul)
+#define ndelay(n) paravirt_mod_ops.const_udelay((n) * 5ul)
 
 #else /* !PARAVIRT || USE_REAL_TIME_DELAY */
 
Index: linux/include/asm-i386/paravirt.h
===================================================================
--- linux.orig/include/asm-i386/paravirt.h
+++ linux/include/asm-i386/paravirt.h
@@ -151,8 +151,9 @@ struct paravirt_ops
 		__attribute__((__section__(".paravirtprobe"))) = fn
 
 extern struct paravirt_ops paravirt_ops;
+extern struct paravirt_ops paravirt_mod_ops;
 
-#define paravirt_enabled() (paravirt_ops.paravirt_enabled)
+#define paravirt_enabled() (paravirt_mod_ops.paravirt_enabled)
 
 static inline void load_esp0(struct tss_struct *tss,
 			     struct thread_struct *thread)
@@ -180,7 +181,7 @@ static inline void do_time_init(void)
 static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
 			   unsigned int *ecx, unsigned int *edx)
 {
-	paravirt_ops.cpuid(eax, ebx, ecx, edx);
+	paravirt_mod_ops.cpuid(eax, ebx, ecx, edx);
 }
 
 /*
@@ -219,52 +220,52 @@ static inline void halt(void)
 
 #define rdmsr(msr,val1,val2) do {				\
 	int _err;						\
-	u64 _l = paravirt_ops.read_msr(msr,&_err);		\
+	u64 _l = paravirt_mod_ops.read_msr(msr,&_err);		\
 	val1 = (u32)_l;						\
 	val2 = _l >> 32;					\
 } while(0)
 
 #define wrmsr(msr,val1,val2) do {				\
 	u64 _l = ((u64)(val2) << 32) | (val1);			\
-	paravirt_ops.write_msr((msr), _l);			\
+	paravirt_mod_ops.write_msr((msr), _l);			\
 } while(0)
 
 #define rdmsrl(msr,val) do {					\
 	int _err;						\
-	val = paravirt_ops.read_msr((msr),&_err);		\
+	val = paravirt_mod_ops.read_msr((msr),&_err);		\
 } while(0)
 
-#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val)))
+#define wrmsrl(msr,val) (paravirt_mod_ops.write_msr((msr),(val)))
 #define wrmsr_safe(msr,a,b) ({					\
 	u64 _l = ((u64)(b) << 32) | (a);			\
-	paravirt_ops.write_msr((msr),_l);			\
+	paravirt_mod_ops.write_msr((msr),_l);			\
 })
 
 /* rdmsr with exception handling */
 #define rdmsr_safe(msr,a,b) ({					\
 	int _err;						\
-	u64 _l = paravirt_ops.read_msr(msr,&_err);		\
+	u64 _l = paravirt_mod_ops.read_msr(msr,&_err);		\
 	(*a) = (u32)_l;						\
 	(*b) = _l >> 32;					\
 	_err; })
 
 #define rdtsc(low,high) do {					\
-	u64 _l = paravirt_ops.read_tsc();			\
+	u64 _l = paravirt_mod_ops.read_tsc();			\
 	low = (u32)_l;						\
 	high = _l >> 32;					\
 } while(0)
 
 #define rdtscl(low) do {					\
-	u64 _l = paravirt_ops.read_tsc();			\
+	u64 _l = paravirt_mod_ops.read_tsc();			\
 	low = (int)_l;						\
 } while(0)
 
-#define rdtscll(val) (val = paravirt_ops.read_tsc())
+#define rdtscll(val) (val = paravirt_mod_ops.read_tsc())
 
 #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
 
 #define rdpmc(counter,low,high) do {				\
-	u64 _l = paravirt_ops.read_pmc();			\
+	u64 _l = paravirt_mod_ops.read_pmc();			\
 	low = (u32)_l;						\
 	high = _l >> 32;					\
 } while(0)
@@ -287,11 +288,11 @@ static inline void halt(void)
 
 /* The paravirtualized I/O functions */
 static inline void slow_down_io(void) {
-	paravirt_ops.io_delay();
+	paravirt_mod_ops.io_delay();
 #ifdef REALLY_SLOW_IO
-	paravirt_ops.io_delay();
-	paravirt_ops.io_delay();
-	paravirt_ops.io_delay();
+	paravirt_mod_ops.io_delay();
+	paravirt_mod_ops.io_delay();
+	paravirt_mod_ops.io_delay();
 #endif
 }
 
@@ -301,24 +302,24 @@ static inline void slow_down_io(void) {
  */
 static inline void apic_write(unsigned long reg, unsigned long v)
 {
-	paravirt_ops.apic_write(reg,v);
+	paravirt_mod_ops.apic_write(reg,v);
 }
 
 static inline void apic_write_atomic(unsigned long reg, unsigned long v)
 {
-	paravirt_ops.apic_write_atomic(reg,v);
+	paravirt_mod_ops.apic_write_atomic(reg,v);
 }
 
 static inline unsigned long apic_read(unsigned long reg)
 {
-	return paravirt_ops.apic_read(reg);
+	return paravirt_mod_ops.apic_read(reg);
 }
 #endif
 
 
-#define __flush_tlb() paravirt_ops.flush_tlb_user()
-#define __flush_tlb_global() paravirt_ops.flush_tlb_kernel()
-#define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr)
+#define __flush_tlb()			paravirt_mod_ops.flush_tlb_user()
+#define __flush_tlb_global()		paravirt_mod_ops.flush_tlb_kernel()
+#define __flush_tlb_single(addr)	paravirt_mod_ops.flush_tlb_single(addr)
 
 static inline void set_pte(pte_t *ptep, pte_t pteval)
 {
@@ -397,7 +398,7 @@ static inline unsigned long __raw_local_
 					   "call *%1;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_SAVE_FLAGS, CLBR_NONE)
-			     : "=a"(f): "m"(paravirt_ops.save_fl)
+			     : "=a"(f): "m"(paravirt_mod_ops.save_fl)
 			     : "memory", "cc");
 	return f;
 }
@@ -408,7 +409,7 @@ static inline void raw_local_irq_restore
 					   "call *%1;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_RESTORE_FLAGS, CLBR_EAX)
-			     : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f)
+			     : "=a"(f) : "m" (paravirt_mod_ops.restore_fl), "0"(f)
 			     : "memory", "cc");
 }
 
@@ -418,7 +419,7 @@ static inline void raw_local_irq_disable
 					   "call *%0;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_IRQ_DISABLE, CLBR_EAX)
-			     : : "m" (paravirt_ops.irq_disable)
+			     : : "m" (paravirt_mod_ops.irq_disable)
 			     : "memory", "eax", "cc");
 }
 
@@ -428,7 +429,7 @@ static inline void raw_local_irq_enable(
 					   "call *%0;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_IRQ_ENABLE, CLBR_EAX)
-			     : : "m" (paravirt_ops.irq_enable)
+			     : : "m" (paravirt_mod_ops.irq_enable)
 			     : "memory", "eax", "cc");
 }
 
@@ -443,19 +444,19 @@ static inline unsigned long __raw_local_
 					  PARAVIRT_SAVE_FLAGS_IRQ_DISABLE,
 					  CLBR_NONE)
 			     : "=a"(f)
-			     : "m" (paravirt_ops.save_fl),
-			       "m" (paravirt_ops.irq_disable)
+			     : "m" (paravirt_mod_ops.save_fl),
+			       "m" (paravirt_mod_ops.irq_disable)
 			     : "memory", "cc");
 	return f;
 }
 
 #define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;"		\
-		     "call *paravirt_ops+%c[irq_disable];"		\
+		     "call *paravirt_mod_ops+%c[irq_disable];"		\
 		     "popl %%edx; popl %%ecx",				\
 		     PARAVIRT_IRQ_DISABLE, CLBR_EAX)
 
 #define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;"		\
-		     "call *paravirt_ops+%c[irq_enable];"		\
+		     "call *paravirt_mod_ops+%c[irq_enable];"		\
 		     "popl %%edx; popl %%ecx",				\
 		     PARAVIRT_IRQ_ENABLE, CLBR_EAX)
 #define CLI_STI_CLOBBERS , "%eax"
@@ -484,13 +485,13 @@ static inline unsigned long __raw_local_
 #define DISABLE_INTERRUPTS(clobbers)			\
 	PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers,	\
 	pushl %ecx; pushl %edx;				\
-	call *paravirt_ops+PARAVIRT_irq_disable;	\
+	call *paravirt_mod_ops+PARAVIRT_irq_disable;	\
 	popl %edx; popl %ecx)				\
 
 #define ENABLE_INTERRUPTS(clobbers)			\
 	PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers,	\
 	pushl %ecx; pushl %edx;				\
-	call *%cs:paravirt_ops+PARAVIRT_irq_enable;	\
+	call *%cs:paravirt_mod_ops+PARAVIRT_irq_enable;	\
 	popl %edx; popl %ecx)
 
 #define ENABLE_INTERRUPTS_SYSEXIT			\

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  0:07 [patch] paravirt: isolate module ops Ingo Molnar
@ 2007-01-06  0:31 ` Zachary Amsden
  2007-01-06  6:25   ` Rusty Russell
  2007-01-06  1:02 ` Zachary Amsden
  1 sibling, 1 reply; 23+ messages in thread
From: Zachary Amsden @ 2007-01-06  0:31 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, linux-kernel, Rusty Russell, Arjan van de Ven,
	Adrian Bunk

Ingo Molnar wrote:
> Subject: [patch] paravirt: isolate module ops
> From: Ingo Molnar <mingo@elte.hu>
>
> only export those operations to modules that have been available to them 
> historically: irq disable/enable, io-delay, udelay, etc.
>
> this isolates that functionality from other paravirtualization 
> functionality that modules have no business messing with.
>
> boot and build tested with CONFIG_PARAVIRT=y.
>
> Signed-off-by: Ingo Molnar <mingo@elte.hu>
> ---
>  arch/i386/kernel/paravirt.c |   41 +++++++++++++++++++++++++++
>  include/asm-i386/delay.h    |    4 +-
>  include/asm-i386/paravirt.h |   65 ++++++++++++++++++++++----------------------
>  3 files changed, 75 insertions(+), 35 deletions(-)
>
> Index: linux/arch/i386/kernel/paravirt.c
> ===================================================================
> --- linux.orig/arch/i386/kernel/paravirt.c
> +++ linux/arch/i386/kernel/paravirt.c
> @@ -492,6 +492,7 @@ struct paravirt_ops paravirt_ops = {
>  
>   	.patch = native_patch,
>  	.banner = default_banner,
> +
>  	.arch_setup = native_nop,
>  	.memory_setup = machine_specific_memory_setup,
>  	.get_wallclock = native_get_wallclock,
> @@ -566,4 +567,42 @@ struct paravirt_ops paravirt_ops = {
>  	.irq_enable_sysexit = native_irq_enable_sysexit,
>  	.iret = native_iret,
>  };
> -EXPORT_SYMBOL(paravirt_ops);
> +
> +/*
> + * These are exported to modules:
> + */
> +struct paravirt_ops paravirt_mod_ops = {
> +	.name = "bare hardware",
> +	.paravirt_enabled = 0,
> +	.kernel_rpl = 0,
> +
> + 	.patch = native_patch,
>   

I don't think you want to leave that one... patching the kernel isn't 
something modules should be doing.

Perhaps a separate structure is better for module ops - this seems error 
prone to calling the wrong one of paravirt_ops vs. paravirt_mod_ops, 
also error prone to set them up in the code which sets paravirt_ops for 
each hypervisor.  Although that does require more dissection, it does 
make sure everyone gets it right, and makes the interface very 
explicitly clear in the header file, which is nice.

If you feel strongly about this, I suggest you push it upstream now, not 
into Andrew's tree (it doesn't apply cleanly there, and breaks the VMI 
patches which are in there).  But this is no problem, I can fix that up 
easily once you get it in.

Zach

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  0:07 [patch] paravirt: isolate module ops Ingo Molnar
  2007-01-06  0:31 ` Zachary Amsden
@ 2007-01-06  1:02 ` Zachary Amsden
  2007-01-06  2:07   ` Arjan van de Ven
  1 sibling, 1 reply; 23+ messages in thread
From: Zachary Amsden @ 2007-01-06  1:02 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, linux-kernel, Rusty Russell, Arjan van de Ven,
	Adrian Bunk

[-- Attachment #1: Type: text/plain, Size: 908 bytes --]

Ingo Molnar wrote:
> Subject: [patch] paravirt: isolate module ops
> From: Ingo Molnar <mingo@elte.hu>
>
> only export those operations to modules that have been available to them 
> historically: irq disable/enable, io-delay, udelay, etc.
>
> this isolates that functionality from other paravirtualization 
> functionality that modules have no business messing with.
>
> boot and build tested with CONFIG_PARAVIRT=y.
>   

I would suggest a slightly different carving.  For one, no TLB flushes.  
If you can't modify PTEs, why do you need to have TLB flushes?  And I 
would allow CR0 read / write for code which saves and restores FPU state 
- possibly even debug register access, although any code which touches 
DRs could be doing something sneaky.  I'm on the fence about that one.

Here is a partially tested patch against the -mm tree.  Let me know what 
you think of this slightly different approach.

[-- Attachment #2: ingo-isolation --]
[-- Type: text/plain, Size: 14420 bytes --]

diff -r 3242f865ce8e arch/i386/kernel/paravirt.c
--- a/arch/i386/kernel/paravirt.c	Thu Jan 04 20:17:02 2007 -0800
+++ b/arch/i386/kernel/paravirt.c	Fri Jan 05 16:48:25 2007 -0800
@@ -492,6 +492,7 @@ struct paravirt_ops paravirt_ops = {
 
  	.patch = native_patch,
 	.banner = default_banner,
+
 	.arch_setup = native_nop,
 	.memory_setup = machine_specific_memory_setup,
 	.get_wallclock = native_get_wallclock,
@@ -577,4 +578,42 @@ struct paravirt_ops paravirt_ops = {
 
 	.startup_ipi_hook = (void *)native_nop,
 };
-EXPORT_SYMBOL(paravirt_ops);
+
+/*
+ * These are exported to modules:
+ */
+struct paravirt_mod_ops paravirt_mod_ops = {
+	.name = "bare hardware",
+	.paravirt_enabled = 0,
+	.kernel_rpl = 0,
+
+ 	.patch = native_patch,
+	.banner = default_banner,
+
+	.save_fl = native_save_fl,
+	.restore_fl = native_restore_fl,
+	.irq_disable = native_irq_disable,
+	.irq_enable = native_irq_enable,
+
+	.cpuid = native_cpuid,
+
+	.read_msr = native_read_msr,
+	.write_msr = native_write_msr,
+
+	.read_tsc = native_read_tsc,
+	.read_pmc = native_read_pmc,
+
+	.io_delay = native_io_delay,
+	.const_udelay = __const_udelay,
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	.apic_write = native_apic_write,
+	.apic_write_atomic = native_apic_write_atomic,
+	.apic_read = native_apic_read,
+#endif
+
+	.flush_tlb_user = native_flush_tlb,
+	.flush_tlb_kernel = native_flush_tlb_global,
+	.flush_tlb_single = native_flush_tlb_single,
+};
+EXPORT_SYMBOL(paravirt_mod_ops);
diff -r 3242f865ce8e include/asm-i386/delay.h
--- a/include/asm-i386/delay.h	Thu Jan 04 20:17:02 2007 -0800
+++ b/include/asm-i386/delay.h	Fri Jan 05 16:11:43 2007 -0800
@@ -17,9 +17,9 @@ extern void __delay(unsigned long loops)
 extern void __delay(unsigned long loops);
 
 #if defined(CONFIG_PARAVIRT) && !defined(USE_REAL_TIME_DELAY)
-#define udelay(n) paravirt_ops.const_udelay((n) * 0x10c7ul)
+#define udelay(n) paravirt_mod_ops.const_udelay((n) * 0x10c7ul)
 
-#define ndelay(n) paravirt_ops.const_udelay((n) * 5ul)
+#define ndelay(n) paravirt_mod_ops.const_udelay((n) * 5ul)
 
 #else /* !PARAVIRT || USE_REAL_TIME_DELAY */
 
diff -r 3242f865ce8e include/asm-i386/paravirt.h
--- a/include/asm-i386/paravirt.h	Thu Jan 04 20:17:02 2007 -0800
+++ b/include/asm-i386/paravirt.h	Fri Jan 05 16:51:10 2007 -0800
@@ -29,12 +29,52 @@ struct Xgt_desc_struct;
 struct Xgt_desc_struct;
 struct tss_struct;
 struct mm_struct;
-struct paravirt_ops
+struct paravirt_mod_ops
 {
 	unsigned int kernel_rpl;
  	int paravirt_enabled;
 	const char *name;
 
+	/* All the function pointers here are declared as "fastcall"
+	   so that we get a specific register-based calling
+	   convention.  This makes it easier to implement inline
+	   assembler replacements. */
+
+	void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx,
+		      unsigned int *ecx, unsigned int *edx);
+
+	/* CLTS / cr0 may be used for math state save / restore */
+	void (fastcall *clts)(void);
+	unsigned long (fastcall *read_cr0)(void);
+	void (fastcall *write_cr0)(unsigned long);
+
+	unsigned long (fastcall *save_fl)(void);
+	void (fastcall *restore_fl)(unsigned long);
+	void (fastcall *irq_disable)(void);
+	void (fastcall *irq_enable)(void);
+
+	/* err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
+	u64 (fastcall *read_msr)(unsigned int msr, int *err);
+	int (fastcall *write_msr)(unsigned int msr, u64 val);
+
+	u64 (fastcall *read_tsc)(void);
+	u64 (fastcall *read_pmc)(void);
+
+	void (fastcall *io_delay)(void);
+	void (*const_udelay)(unsigned long loops);
+
+	/* May be needed by device drivers to flush cache */
+	void (fastcall *wbinvd)(void);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	void (fastcall *apic_write)(unsigned long reg, unsigned long v);
+	void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v);
+	unsigned long (fastcall *apic_read)(unsigned long reg);
+#endif
+};
+
+struct paravirt_ops
+{
 	/*
 	 * Patch may replace one of the defined code sequences with arbitrary
 	 * code, subject to the same register constraints.  This generally
@@ -54,21 +94,8 @@ struct paravirt_ops
 	int (*set_wallclock)(unsigned long);
 	void (*time_init)(void);
 
-	/* All the function pointers here are declared as "fastcall"
-	   so that we get a specific register-based calling
-	   convention.  This makes it easier to implement inline
-	   assembler replacements. */
-
-	void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx,
-		      unsigned int *ecx, unsigned int *edx);
-
 	unsigned long (fastcall *get_debugreg)(int regno);
 	void (fastcall *set_debugreg)(int regno, unsigned long value);
-
-	void (fastcall *clts)(void);
-
-	unsigned long (fastcall *read_cr0)(void);
-	void (fastcall *write_cr0)(unsigned long);
 
 	unsigned long (fastcall *read_cr2)(void);
 	void (fastcall *write_cr2)(unsigned long);
@@ -80,20 +107,8 @@ struct paravirt_ops
 	unsigned long (fastcall *read_cr4)(void);
 	void (fastcall *write_cr4)(unsigned long);
 
-	unsigned long (fastcall *save_fl)(void);
-	void (fastcall *restore_fl)(unsigned long);
-	void (fastcall *irq_disable)(void);
-	void (fastcall *irq_enable)(void);
 	void (fastcall *safe_halt)(void);
 	void (fastcall *halt)(void);
-	void (fastcall *wbinvd)(void);
-
-	/* err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
-	u64 (fastcall *read_msr)(unsigned int msr, int *err);
-	int (fastcall *write_msr)(unsigned int msr, u64 val);
-
-	u64 (fastcall *read_tsc)(void);
-	u64 (fastcall *read_pmc)(void);
 
 	void (fastcall *load_tr_desc)(void);
 	void (fastcall *load_gdt)(const struct Xgt_desc_struct *);
@@ -114,13 +129,7 @@ struct paravirt_ops
 
 	void (fastcall *set_iopl_mask)(unsigned mask);
 
-	void (fastcall *io_delay)(void);
-	void (*const_udelay)(unsigned long loops);
-
 #ifdef CONFIG_X86_LOCAL_APIC
-	void (fastcall *apic_write)(unsigned long reg, unsigned long v);
-	void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v);
-	unsigned long (fastcall *apic_read)(unsigned long reg);
 	void (*setup_boot_clock)(void);
 	void (*setup_secondary_clock)(void);
 #endif
@@ -163,8 +172,9 @@ struct paravirt_ops
 		__attribute__((__section__(".paravirtprobe"))) = fn
 
 extern struct paravirt_ops paravirt_ops;
-
-#define paravirt_enabled() (paravirt_ops.paravirt_enabled)
+extern struct paravirt_mod_ops paravirt_mod_ops;
+
+#define paravirt_enabled() (paravirt_mod_ops.paravirt_enabled)
 
 static inline void load_esp0(struct tss_struct *tss,
 			     struct thread_struct *thread)
@@ -192,7 +202,7 @@ static inline void __cpuid(unsigned int 
 static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
 			   unsigned int *ecx, unsigned int *edx)
 {
-	paravirt_ops.cpuid(eax, ebx, ecx, edx);
+	paravirt_mod_ops.cpuid(eax, ebx, ecx, edx);
 }
 
 /*
@@ -201,10 +211,10 @@ static inline void __cpuid(unsigned int 
 #define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg)
 #define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val)
 
-#define clts() paravirt_ops.clts()
-
-#define read_cr0() paravirt_ops.read_cr0()
-#define write_cr0(x) paravirt_ops.write_cr0(x)
+#define clts() paravirt_mod_ops.clts()
+
+#define read_cr0() paravirt_mod_ops.read_cr0()
+#define write_cr0(x) paravirt_mod_ops.write_cr0(x)
 
 #define read_cr2() paravirt_ops.read_cr2()
 #define write_cr2(x) paravirt_ops.write_cr2(x)
@@ -225,58 +235,58 @@ static inline void halt(void)
 {
 	paravirt_ops.safe_halt();
 }
-#define wbinvd() paravirt_ops.wbinvd()
-
-#define get_kernel_rpl()  (paravirt_ops.kernel_rpl)
+#define wbinvd() paravirt_mod_ops.wbinvd()
+
+#define get_kernel_rpl()  (paravirt_mod_ops.kernel_rpl)
 
 #define rdmsr(msr,val1,val2) do {				\
 	int _err;						\
-	u64 _l = paravirt_ops.read_msr(msr,&_err);		\
+	u64 _l = paravirt_mod_ops.read_msr(msr,&_err);		\
 	val1 = (u32)_l;						\
 	val2 = _l >> 32;					\
 } while(0)
 
 #define wrmsr(msr,val1,val2) do {				\
 	u64 _l = ((u64)(val2) << 32) | (val1);			\
-	paravirt_ops.write_msr((msr), _l);			\
+	paravirt_mod_ops.write_msr((msr), _l);			\
 } while(0)
 
 #define rdmsrl(msr,val) do {					\
 	int _err;						\
-	val = paravirt_ops.read_msr((msr),&_err);		\
-} while(0)
-
-#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val)))
+	val = paravirt_mod_ops.read_msr((msr),&_err);		\
+} while(0)
+
+#define wrmsrl(msr,val) (paravirt_mod_ops.write_msr((msr),(val)))
 #define wrmsr_safe(msr,a,b) ({					\
 	u64 _l = ((u64)(b) << 32) | (a);			\
-	paravirt_ops.write_msr((msr),_l);			\
+	paravirt_mod_ops.write_msr((msr),_l);			\
 })
 
 /* rdmsr with exception handling */
 #define rdmsr_safe(msr,a,b) ({					\
 	int _err;						\
-	u64 _l = paravirt_ops.read_msr(msr,&_err);		\
+	u64 _l = paravirt_mod_ops.read_msr(msr,&_err);		\
 	(*a) = (u32)_l;						\
 	(*b) = _l >> 32;					\
 	_err; })
 
 #define rdtsc(low,high) do {					\
-	u64 _l = paravirt_ops.read_tsc();			\
+	u64 _l = paravirt_mod_ops.read_tsc();			\
 	low = (u32)_l;						\
 	high = _l >> 32;					\
 } while(0)
 
 #define rdtscl(low) do {					\
-	u64 _l = paravirt_ops.read_tsc();			\
+	u64 _l = paravirt_mod_ops.read_tsc();			\
 	low = (int)_l;						\
 } while(0)
 
-#define rdtscll(val) (val = paravirt_ops.read_tsc())
+#define rdtscll(val) (val = paravirt_mod_ops.read_tsc())
 
 #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
 
 #define rdpmc(counter,low,high) do {				\
-	u64 _l = paravirt_ops.read_pmc();			\
+	u64 _l = paravirt_mod_ops.read_pmc();			\
 	low = (u32)_l;						\
 	high = _l >> 32;					\
 } while(0)
@@ -299,11 +309,11 @@ static inline void halt(void)
 
 /* The paravirtualized I/O functions */
 static inline void slow_down_io(void) {
-	paravirt_ops.io_delay();
+	paravirt_mod_ops.io_delay();
 #ifdef REALLY_SLOW_IO
-	paravirt_ops.io_delay();
-	paravirt_ops.io_delay();
-	paravirt_ops.io_delay();
+	paravirt_mod_ops.io_delay();
+	paravirt_mod_ops.io_delay();
+	paravirt_mod_ops.io_delay();
 #endif
 }
 
@@ -313,17 +323,17 @@ static inline void slow_down_io(void) {
  */
 static inline void apic_write(unsigned long reg, unsigned long v)
 {
-	paravirt_ops.apic_write(reg,v);
+	paravirt_mod_ops.apic_write(reg,v);
 }
 
 static inline void apic_write_atomic(unsigned long reg, unsigned long v)
 {
-	paravirt_ops.apic_write_atomic(reg,v);
+	paravirt_mod_ops.apic_write_atomic(reg,v);
 }
 
 static inline unsigned long apic_read(unsigned long reg)
 {
-	return paravirt_ops.apic_read(reg);
+	return paravirt_mod_ops.apic_read(reg);
 }
 
 static inline void setup_boot_clock(void)
@@ -447,7 +457,7 @@ static inline unsigned long __raw_local_
 					   "call *%1;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_SAVE_FLAGS, CLBR_NONE)
-			     : "=a"(f): "m"(paravirt_ops.save_fl)
+			     : "=a"(f): "m"(paravirt_mod_ops.save_fl)
 			     : "memory", "cc");
 	return f;
 }
@@ -458,7 +468,7 @@ static inline void raw_local_irq_restore
 					   "call *%1;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_RESTORE_FLAGS, CLBR_EAX)
-			     : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f)
+			     : "=a"(f) : "m" (paravirt_mod_ops.restore_fl), "0"(f)
 			     : "memory", "cc");
 }
 
@@ -468,7 +478,7 @@ static inline void raw_local_irq_disable
 					   "call *%0;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_IRQ_DISABLE, CLBR_EAX)
-			     : : "m" (paravirt_ops.irq_disable)
+			     : : "m" (paravirt_mod_ops.irq_disable)
 			     : "memory", "eax", "cc");
 }
 
@@ -478,7 +488,7 @@ static inline void raw_local_irq_enable(
 					   "call *%0;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_IRQ_ENABLE, CLBR_EAX)
-			     : : "m" (paravirt_ops.irq_enable)
+			     : : "m" (paravirt_mod_ops.irq_enable)
 			     : "memory", "eax", "cc");
 }
 
@@ -493,26 +503,26 @@ static inline unsigned long __raw_local_
 					  PARAVIRT_SAVE_FLAGS_IRQ_DISABLE,
 					  CLBR_NONE)
 			     : "=a"(f)
-			     : "m" (paravirt_ops.save_fl),
-			       "m" (paravirt_ops.irq_disable)
+			     : "m" (paravirt_mod_ops.save_fl),
+			       "m" (paravirt_mod_ops.irq_disable)
 			     : "memory", "cc");
 	return f;
 }
 
 #define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;"		\
-		     "call *paravirt_ops+%c[irq_disable];"		\
+		     "call *paravirt_mod_ops+%c[irq_disable];"		\
 		     "popl %%edx; popl %%ecx",				\
 		     PARAVIRT_IRQ_DISABLE, CLBR_EAX)
 
 #define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;"		\
-		     "call *paravirt_ops+%c[irq_enable];"		\
+		     "call *paravirt_mod_ops+%c[irq_enable];"		\
 		     "popl %%edx; popl %%ecx",				\
 		     PARAVIRT_IRQ_ENABLE, CLBR_EAX)
 #define CLI_STI_CLOBBERS , "%eax"
 #define CLI_STI_INPUT_ARGS \
 	,								\
-	[irq_disable] "i" (offsetof(struct paravirt_ops, irq_disable)),	\
-	[irq_enable] "i" (offsetof(struct paravirt_ops, irq_enable))
+	[irq_disable] "i" (offsetof(struct paravirt_mod_ops, irq_disable)),	\
+	[irq_enable] "i" (offsetof(struct paravirt_mod_ops, irq_enable))
 
 #else  /* __ASSEMBLY__ */
 
@@ -534,13 +544,13 @@ 772:;						\
 #define DISABLE_INTERRUPTS(clobbers)			\
 	PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers,	\
 	pushl %ecx; pushl %edx;				\
-	call *paravirt_ops+PARAVIRT_irq_disable;	\
+	call *paravirt_mod_ops+PARAVIRT_irq_disable;	\
 	popl %edx; popl %ecx)				\
 
 #define ENABLE_INTERRUPTS(clobbers)			\
 	PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers,	\
 	pushl %ecx; pushl %edx;				\
-	call *%cs:paravirt_ops+PARAVIRT_irq_enable;	\
+	call *%cs:paravirt_mod_ops+PARAVIRT_irq_enable;	\
 	popl %edx; popl %ecx)
 
 #define ENABLE_INTERRUPTS_SYSEXIT			\
@@ -548,7 +558,7 @@ 772:;						\
 	jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit)
 
 #define GET_CR0_INTO_EAX			\
-	call *paravirt_ops+PARAVIRT_read_cr0
+	call *paravirt_mod_ops+PARAVIRT_read_cr0
 
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_PARAVIRT */
diff -r 3242f865ce8e arch/i386/kernel/asm-offsets.c
--- a/arch/i386/kernel/asm-offsets.c	Thu Jan 04 20:17:02 2007 -0800
+++ b/arch/i386/kernel/asm-offsets.c	Fri Jan 05 16:49:15 2007 -0800
@@ -104,11 +104,11 @@ void foo(void)
 
 #ifdef CONFIG_PARAVIRT
 	BLANK();
-	OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled);
-	OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable);
-	OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable);
+	OFFSET(PARAVIRT_enabled, paravirt_mod_ops, paravirt_enabled);
+	OFFSET(PARAVIRT_irq_disable, paravirt_mod_ops, irq_disable);
+	OFFSET(PARAVIRT_irq_enable, paravirt_mod_ops, irq_enable);
 	OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit);
 	OFFSET(PARAVIRT_iret, paravirt_ops, iret);
-	OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
+	OFFSET(PARAVIRT_read_cr0, paravirt_mod_ops, read_cr0);
 #endif
 }

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  1:02 ` Zachary Amsden
@ 2007-01-06  2:07   ` Arjan van de Ven
  2007-01-06  4:41     ` Zachary Amsden
  2007-01-06  5:34     ` Rusty Russell
  0 siblings, 2 replies; 23+ messages in thread
From: Arjan van de Ven @ 2007-01-06  2:07 UTC (permalink / raw)
  To: Zachary Amsden
  Cc: Ingo Molnar, Andrew Morton, linux-kernel, Rusty Russell, Adrian Bunk

> 
> I would suggest a slightly different carving.  For one, no TLB flushes.  
> If you can't modify PTEs, why do you need to have TLB flushes?  And I 
> would allow CR0 read / write for code which saves and restores FPU state 

no that is abstracted away by kernel_fpu_begin/end. Modules have no
business doing that themselves

> - possibly even debug register access, although any code which touches 
> DRs could be doing something sneaky.  I'm on the fence about that one.

lets not allow it at all


-- 
if you want to mail me at work (you don't), use arjan (at) linux.intel.com
Test the interaction between Linux and your BIOS via http://www.linuxfirmwarekit.org


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  2:07   ` Arjan van de Ven
@ 2007-01-06  4:41     ` Zachary Amsden
  2007-01-06  5:34     ` Rusty Russell
  1 sibling, 0 replies; 23+ messages in thread
From: Zachary Amsden @ 2007-01-06  4:41 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Ingo Molnar, Andrew Morton, linux-kernel, Rusty Russell, Adrian Bunk

Arjan van de Ven wrote:
>> I would suggest a slightly different carving.  For one, no TLB flushes.  
>> If you can't modify PTEs, why do you need to have TLB flushes?  And I 
>> would allow CR0 read / write for code which saves and restores FPU state 
>>     
>
> no that is abstracted away by kernel_fpu_begin/end. Modules have no
> business doing that themselves
>   

As long as they don't rely on inlines for that... checking and 
kernel_fpu_end is inline and uses stts(), which requires CR0 read / 
write.  One can easily imagine binary modules which do use the fpu, and 
these were not broken before, so breaking them now seems the wrong thing 
to do.

I agree on debug registers - anything touching them is way too shady.  
And there is no reason modules should be doing raw page table 
operations, they should use proper mm functions and leave the page 
details to the mm layer, which doesn't do these things inline.

Basically, it is just the things that do get inlined that I think we 
should worry about.  If you all feel strongly that this should be fixed 
in 2.6.20, perhaps the best thing to do is in fact 
EXPORT_SYMBOL_GPL(paravirt_ops), and we can queue up a patch in -mm 
which will export those paravirt_ops required inline by modules for 
2.6.21.  Otherwise, I think there will be too many rejects against the 
paravirt code in Andrew's tree.

Zach

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  2:07   ` Arjan van de Ven
  2007-01-06  4:41     ` Zachary Amsden
@ 2007-01-06  5:34     ` Rusty Russell
  1 sibling, 0 replies; 23+ messages in thread
From: Rusty Russell @ 2007-01-06  5:34 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Zachary Amsden, Ingo Molnar, Andrew Morton, linux-kernel, Adrian Bunk

On Fri, 2007-01-05 at 18:07 -0800, Arjan van de Ven wrote:
> > 
> > I would suggest a slightly different carving.  For one, no TLB flushes.  
> > If you can't modify PTEs, why do you need to have TLB flushes?  And I 
> > would allow CR0 read / write for code which saves and restores FPU state 
> 
> no that is abstracted away by kernel_fpu_begin/end. Modules have no
> business doing that themselves

Sure, but it'll take some time to fix the raid modules (which are the
ones which abuse this).

I'm testing a patch now, I'll send the clts removal patch on top of that
once it's done.

Rusty.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  0:31 ` Zachary Amsden
@ 2007-01-06  6:25   ` Rusty Russell
  2007-01-06  7:08     ` Ingo Molnar
  2007-01-06  7:14     ` Ingo Molnar
  0 siblings, 2 replies; 23+ messages in thread
From: Rusty Russell @ 2007-01-06  6:25 UTC (permalink / raw)
  To: Zachary Amsden, Jeremy Fitzhardinge, Chris Wright
  Cc: Ingo Molnar, Andrew Morton, linux-kernel, Arjan van de Ven, Adrian Bunk

On Fri, 2007-01-05 at 16:31 -0800, Zachary Amsden wrote:
> Ingo Molnar wrote:
> > Subject: [patch] paravirt: isolate module ops
> > From: Ingo Molnar <mingo@elte.hu>
> >
> > only export those operations to modules that have been available to them 
> > historically: irq disable/enable, io-delay, udelay, etc.
> >
> > this isolates that functionality from other paravirtualization 
> > functionality that modules have no business messing with.
> >
> > boot and build tested with CONFIG_PARAVIRT=y.
> >
> > Signed-off-by: Ingo Molnar <mingo@elte.hu>
> > ---
> >  arch/i386/kernel/paravirt.c |   41 +++++++++++++++++++++++++++
> >  include/asm-i386/delay.h    |    4 +-
> >  include/asm-i386/paravirt.h |   65 ++++++++++++++++++++++----------------------
> >  3 files changed, 75 insertions(+), 35 deletions(-)
> >
> > Index: linux/arch/i386/kernel/paravirt.c
> > ===================================================================
> > --- linux.orig/arch/i386/kernel/paravirt.c
> > +++ linux/arch/i386/kernel/paravirt.c
> > @@ -492,6 +492,7 @@ struct paravirt_ops paravirt_ops = {
> >  
> >   	.patch = native_patch,
> >  	.banner = default_banner,
> > +
> >  	.arch_setup = native_nop,
> >  	.memory_setup = machine_specific_memory_setup,
> >  	.get_wallclock = native_get_wallclock,
> > @@ -566,4 +567,42 @@ struct paravirt_ops paravirt_ops = {
> >  	.irq_enable_sysexit = native_irq_enable_sysexit,
> >  	.iret = native_iret,
> >  };
> > -EXPORT_SYMBOL(paravirt_ops);
> > +
> > +/*
> > + * These are exported to modules:
> > + */
> > +struct paravirt_ops paravirt_mod_ops = {
> > +	.name = "bare hardware",
> > +	.paravirt_enabled = 0,
> > +	.kernel_rpl = 0,
> > +
> > + 	.patch = native_patch,
> >   
> 
> I don't think you want to leave that one... patching the kernel isn't 
> something modules should be doing.

Yeah, this patch is terrible, but I know why Ingo did it; it's my fault
for not finishing my version yesterday (but allmodconfig takes a while
to build and every change to paravirt.h rebuilds the universe...).

So here's my variant, compile-tested with "make allmodconfig".  Exports
individual functions, some of which can be reduced over time as the
modules involved are whacked into shape.

Note: it reduces the patching space by 1 byte (direct jumps vs indirect
jumps).  If this a problem for any paravirt_ops backend in practice, we
can add noops...

Cheers,
Rusty.
PS.  May break with other configurations...

Name: don't export paravirt_ops structure, do individual functions

Some of the more obscure ones are only used by one or two modules.
We can almost certainly reduce them with more work.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff -r 48f31ae5d7b5 arch/i386/kernel/paravirt.c
--- a/arch/i386/kernel/paravirt.c	Sat Jan 06 10:32:24 2007 +1100
+++ b/arch/i386/kernel/paravirt.c	Sat Jan 06 17:23:12 2007 +1100
@@ -596,6 +596,154 @@ static int __init print_banner(void)
 	return 0;
 }
 core_initcall(print_banner);
+
+unsigned long paravirt_save_flags(void)
+{
+	return paravirt_ops.save_fl();
+}
+EXPORT_SYMBOL(paravirt_save_flags);
+
+void paravirt_restore_flags(unsigned long flags)
+{
+	paravirt_ops.restore_fl(flags);
+}
+EXPORT_SYMBOL(paravirt_restore_flags);
+
+void paravirt_irq_disable(void)
+{
+	paravirt_ops.irq_disable();
+}
+EXPORT_SYMBOL(paravirt_irq_disable);
+
+void paravirt_irq_enable(void)
+{
+	paravirt_ops.irq_enable();
+}
+EXPORT_SYMBOL(paravirt_irq_enable);
+
+void paravirt_io_delay(void)
+{
+	paravirt_ops.io_delay();
+}
+EXPORT_SYMBOL(paravirt_io_delay);
+
+void paravirt_const_udelay(unsigned long loops)
+{
+	paravirt_ops.const_udelay(loops);
+}
+EXPORT_SYMBOL(paravirt_const_udelay);
+
+u64 paravirt_read_msr(unsigned int msr, int *err)
+{
+	return paravirt_ops.read_msr(msr, err);
+}
+EXPORT_SYMBOL(paravirt_read_msr);
+
+int paravirt_write_msr(unsigned int msr, u64 val)
+{
+	return paravirt_ops.write_msr(msr, val);
+}
+EXPORT_SYMBOL(paravirt_write_msr);
+
+u64 paravirt_read_tsc(void)
+{
+	return paravirt_ops.read_tsc();
+}
+EXPORT_SYMBOL(paravirt_read_tsc);
+
+int paravirt_enabled(void)
+{
+	return paravirt_ops.paravirt_enabled;
+}
+EXPORT_SYMBOL(paravirt_enabled);
+
+void clts(void)
+{
+	paravirt_ops.clts();
+}
+EXPORT_SYMBOL(clts);
+
+unsigned long read_cr0(void)
+{
+	return paravirt_ops.read_cr0();
+}
+EXPORT_SYMBOL(read_cr0);
+
+void write_cr0(unsigned long cr0)
+{
+	paravirt_ops.write_cr0(cr0);
+}
+EXPORT_SYMBOL(write_cr0);
+
+void wbinvd(void)
+{
+	paravirt_ops.wbinvd();
+}
+EXPORT_SYMBOL(wbinvd);
+
+void raw_safe_halt(void)
+{
+	paravirt_ops.safe_halt();
+}
+EXPORT_SYMBOL(raw_safe_halt);
+
+void halt(void)
+{
+	paravirt_ops.safe_halt();
+}
+EXPORT_SYMBOL(halt);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+void apic_write(unsigned long reg, unsigned long v)
+{
+	paravirt_ops.apic_write(reg,v);
+}
+EXPORT_SYMBOL_GPL(apic_write);
+
+unsigned long apic_read(unsigned long reg)
+{
+	return paravirt_ops.apic_read(reg);
+}
+EXPORT_SYMBOL_GPL(apic_read);
+#endif
+
+void __cpuid(unsigned int *eax, unsigned int *ebx,
+	     unsigned int *ecx, unsigned int *edx)
+{
+	paravirt_ops.cpuid(eax, ebx, ecx, edx);
+}
+EXPORT_SYMBOL(__cpuid);
+
+#ifdef CONFIG_X86_PAE
+unsigned long long pgd_val(pgd_t x)
+{
+	return paravirt_ops.pgd_val(x);
+}
+unsigned long long pte_val(pte_t x)
+{
+	return paravirt_ops.pte_val(x);
+}
+pte_t __pte(unsigned long long x)
+{
+	return paravirt_ops.make_pte(x);
+}
+#else
+unsigned long pgd_val(pgd_t x)
+{
+	return paravirt_ops.pgd_val(x);
+}
+unsigned long pte_val(pte_t x)
+{
+	return paravirt_ops.pte_val(x);
+}
+pte_t __pte(unsigned long x)
+{
+	return paravirt_ops.make_pte(x);
+}
+#endif
+EXPORT_SYMBOL_GPL(pgd_val);
+EXPORT_SYMBOL_GPL(pte_val);
+EXPORT_SYMBOL_GPL(__pte);
 
 /* We simply declare start_kernel to be the paravirt probe of last resort. */
 paravirt_probe(start_kernel);
@@ -710,4 +858,3 @@ struct paravirt_ops paravirt_ops = {
 
 	.startup_ipi_hook = (void *)native_nop,
 };
-EXPORT_SYMBOL(paravirt_ops);
diff -r 48f31ae5d7b5 include/asm-i386/delay.h
--- a/include/asm-i386/delay.h	Sat Jan 06 10:32:24 2007 +1100
+++ b/include/asm-i386/delay.h	Sat Jan 06 11:23:25 2007 +1100
@@ -17,9 +17,9 @@ extern void __delay(unsigned long loops)
 extern void __delay(unsigned long loops);
 
 #if defined(CONFIG_PARAVIRT) && !defined(USE_REAL_TIME_DELAY)
-#define udelay(n) paravirt_ops.const_udelay((n) * 0x10c7ul)
+#define udelay(n) paravirt_const_udelay((n) * 0x10c7ul)
 
-#define ndelay(n) paravirt_ops.const_udelay((n) * 5ul)
+#define ndelay(n) paravirt_const_udelay((n) * 5ul)
 
 #else /* !PARAVIRT || USE_REAL_TIME_DELAY */
 
diff -r 48f31ae5d7b5 include/asm-i386/paravirt.h
--- a/include/asm-i386/paravirt.h	Sat Jan 06 10:32:24 2007 +1100
+++ b/include/asm-i386/paravirt.h	Sat Jan 06 16:46:14 2007 +1100
@@ -198,6 +198,18 @@ extern struct paravirt_ops paravirt_ops;
 
 void native_pagetable_setup_start(pgd_t *pgd);
 
+/* These are the functions exported to modules. */
+unsigned long paravirt_save_flags(void);
+void paravirt_restore_flags(unsigned long flags);
+void paravirt_irq_disable(void);
+void paravirt_irq_enable(void);
+void paravirt_const_udelay(unsigned long loops);
+void paravirt_io_delay(void);
+u64 paravirt_read_msr(unsigned int msr, int *err);
+int paravirt_write_msr(unsigned int msr, u64 val);
+u64 paravirt_read_tsc(void);
+int paravirt_enabled(void);
+
 #ifdef CONFIG_X86_PAE
 fastcall unsigned long long native_pte_val(pte_t);
 fastcall unsigned long long native_pmd_val(pmd_t);
@@ -216,8 +228,6 @@ fastcall pgd_t native_make_pgd(unsigned 
 fastcall pgd_t native_make_pgd(unsigned long pgd);
 #endif
 
-#define paravirt_enabled() (paravirt_ops.paravirt_enabled)
-
 static inline void load_esp0(struct tss_struct *tss,
 			     struct thread_struct *thread)
 {
@@ -241,11 +251,8 @@ static inline void do_time_init(void)
 }
 
 /* The paravirtualized CPUID instruction. */
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
-			   unsigned int *ecx, unsigned int *edx)
-{
-	paravirt_ops.cpuid(eax, ebx, ecx, edx);
-}
+void __cpuid(unsigned int *eax, unsigned int *ebx,
+	     unsigned int *ecx, unsigned int *edx);
 
 /*
  * These special macros can be used to get or set a debugging register
@@ -253,10 +260,9 @@ static inline void __cpuid(unsigned int 
 #define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg)
 #define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val)
 
-#define clts() paravirt_ops.clts()
-
-#define read_cr0() paravirt_ops.read_cr0()
-#define write_cr0(x) paravirt_ops.write_cr0(x)
+void clts(void);
+unsigned long read_cr0(void);
+void write_cr0(unsigned long);
 
 #define read_cr2() paravirt_ops.read_cr2()
 #define write_cr2(x) paravirt_ops.write_cr2(x)
@@ -270,62 +276,55 @@ static inline void __cpuid(unsigned int 
 
 #define raw_ptep_get_and_clear(xp)	(paravirt_ops.ptep_get_and_clear(xp))
 
-static inline void raw_safe_halt(void)
-{
-	paravirt_ops.safe_halt();
-}
-
-static inline void halt(void)
-{
-	paravirt_ops.safe_halt();
-}
-#define wbinvd() paravirt_ops.wbinvd()
+void raw_safe_halt(void);
+void halt(void);
+void wbinvd(void);
 
 #define get_kernel_rpl()  (paravirt_ops.kernel_rpl)
 
 #define rdmsr(msr,val1,val2) do {				\
 	int _err;						\
-	u64 _l = paravirt_ops.read_msr(msr,&_err);		\
+	u64 _l = paravirt_read_msr(msr,&_err);			\
 	val1 = (u32)_l;						\
 	val2 = _l >> 32;					\
 } while(0)
 
 #define wrmsr(msr,val1,val2) do {				\
 	u64 _l = ((u64)(val2) << 32) | (val1);			\
-	paravirt_ops.write_msr((msr), _l);			\
+	paravirt_write_msr((msr), _l);				\
 } while(0)
 
 #define rdmsrl(msr,val) do {					\
 	int _err;						\
-	val = paravirt_ops.read_msr((msr),&_err);		\
+	val = paravirt_read_msr((msr),&_err);			\
 } while(0)
 
-#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val)))
+#define wrmsrl(msr,val) (paravirt_write_msr((msr),(val)))
 #define wrmsr_safe(msr,a,b) ({					\
 	u64 _l = ((u64)(b) << 32) | (a);			\
-	paravirt_ops.write_msr((msr),_l);			\
+	paravirt_write_msr((msr),_l);				\
 })
 
 /* rdmsr with exception handling */
 #define rdmsr_safe(msr,a,b) ({					\
 	int _err;						\
-	u64 _l = paravirt_ops.read_msr(msr,&_err);		\
+	u64 _l = paravirt_read_msr(msr,&_err);			\
 	(*a) = (u32)_l;						\
 	(*b) = _l >> 32;					\
 	_err; })
 
 #define rdtsc(low,high) do {					\
-	u64 _l = paravirt_ops.read_tsc();			\
+	u64 _l = paravirt_read_tsc();				\
 	low = (u32)_l;						\
 	high = _l >> 32;					\
 } while(0)
 
 #define rdtscl(low) do {					\
-	u64 _l = paravirt_ops.read_tsc();			\
+	u64 _l = paravirt_read_tsc();				\
 	low = (int)_l;						\
 } while(0)
 
-#define rdtscll(val) (val = paravirt_ops.read_tsc())
+#define rdtscll(val) (val = paravirt_read_tsc())
 
 #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
 
@@ -351,11 +350,18 @@ static inline void halt(void)
 	(paravirt_ops.write_idt_entry((dt), (entry), (low), (high)))
 #define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask))
 
-#define __pte(x)	paravirt_ops.make_pte(x)
+/* FIXME: drivers/char/drm/drm_memory.h wants access to these. */
+#ifdef CONFIG_X86_PAE
+unsigned long long pgd_val(pgd_t x);
+unsigned long long pte_val(pte_t x);
+pte_t __pte(unsigned long long x);
+#else
+unsigned long pgd_val(pgd_t x);
+unsigned long pte_val(pte_t x);
+pte_t __pte(unsigned long x);
+#endif
+
 #define __pgd(x)	paravirt_ops.make_pgd(x)
-
-#define pte_val(x)	paravirt_ops.pte_val(x)
-#define pgd_val(x)	paravirt_ops.pgd_val(x)
 
 #ifdef CONFIG_X86_PAE
 #define __pmd(x)	paravirt_ops.make_pmd(x)
@@ -363,12 +369,13 @@ static inline void halt(void)
 #endif
 
 /* The paravirtualized I/O functions */
+
 static inline void slow_down_io(void) {
-	paravirt_ops.io_delay();
+	paravirt_io_delay();
 #ifdef REALLY_SLOW_IO
-	paravirt_ops.io_delay();
-	paravirt_ops.io_delay();
-	paravirt_ops.io_delay();
+	paravirt_io_delay();
+	paravirt_io_delay();
+	paravirt_io_delay();
 #endif
 }
 
@@ -376,19 +383,12 @@ static inline void slow_down_io(void) {
 /*
  * Basic functions accessing APICs.
  */
-static inline void apic_write(unsigned long reg, unsigned long v)
-{
-	paravirt_ops.apic_write(reg,v);
-}
+void apic_write(unsigned long reg, unsigned long v);
+unsigned long apic_read(unsigned long reg);
 
 static inline void apic_write_atomic(unsigned long reg, unsigned long v)
 {
 	paravirt_ops.apic_write_atomic(reg,v);
-}
-
-static inline unsigned long apic_read(unsigned long reg)
-{
-	return paravirt_ops.apic_read(reg);
 }
 #endif
 
@@ -532,42 +532,38 @@ static inline unsigned long __raw_local_
 	unsigned long f;
 
 	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
-					   "call *%1;"
+					   "call paravirt_save_flags;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_SAVE_FLAGS, CLBR_NONE)
-			     : "=a"(f): "m"(paravirt_ops.save_fl)
-			     : "memory", "cc");
+			     : "=a"(f) : : "memory", "cc");
 	return f;
 }
 
 static inline void raw_local_irq_restore(unsigned long f)
 {
 	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
-					   "call *%1;"
+					   "call paravirt_restore_flags;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_RESTORE_FLAGS, CLBR_EAX)
-			     : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f)
-			     : "memory", "cc");
+			     : "=a"(f) : "0"(f) : "memory", "cc");
 }
 
 static inline void raw_local_irq_disable(void)
 {
 	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
-					   "call *%0;"
+					   "call paravirt_irq_disable;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_IRQ_DISABLE, CLBR_EAX)
-			     : : "m" (paravirt_ops.irq_disable)
-			     : "memory", "eax", "cc");
+			     : : : "memory", "eax", "cc");
 }
 
 static inline void raw_local_irq_enable(void)
 {
 	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
-					   "call *%0;"
+					   "call paravirt_irq_enable;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_IRQ_ENABLE, CLBR_EAX)
-			     : : "m" (paravirt_ops.irq_enable)
-			     : "memory", "eax", "cc");
+			     : : : "memory", "eax", "cc");
 }
 
 static inline unsigned long __raw_local_irq_save(void)
@@ -575,15 +571,13 @@ static inline unsigned long __raw_local_
 	unsigned long f;
 
 	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
-					   "call *%1; pushl %%eax;"
-					   "call *%2; popl %%eax;"
-					   "popl %%edx; popl %%ecx",
+					   "call paravirt_save_flags;"
+					   "pushl %%eax;"
+					   "call paravirt_irq_disable;"
+					   "popl %%eax;popl %%edx; popl %%ecx",
 					  PARAVIRT_SAVE_FLAGS_IRQ_DISABLE,
 					  CLBR_NONE)
-			     : "=a"(f)
-			     : "m" (paravirt_ops.save_fl),
-			       "m" (paravirt_ops.irq_disable)
-			     : "memory", "cc");
+			     : "=a"(f) : : "memory", "cc");
 	return f;
 }
 
diff -r 48f31ae5d7b5 include/linux/irqflags.h
--- a/include/linux/irqflags.h	Sat Jan 06 10:32:24 2007 +1100
+++ b/include/linux/irqflags.h	Sat Jan 06 15:03:42 2007 +1100
@@ -74,11 +74,11 @@
 #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
-#define safe_halt()						\
-	do {							\
-		trace_hardirqs_on();				\
-		raw_safe_halt();				\
-	} while (0)
+static inline void safe_halt(void)
+{
+	trace_hardirqs_on();
+	raw_safe_halt();
+}
 
 #define local_save_flags(flags)		raw_local_save_flags(flags)
 



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  6:25   ` Rusty Russell
@ 2007-01-06  7:08     ` Ingo Molnar
  2007-01-06  7:12       ` Ingo Molnar
  2007-01-06 17:42       ` Rusty Russell
  2007-01-06  7:14     ` Ingo Molnar
  1 sibling, 2 replies; 23+ messages in thread
From: Ingo Molnar @ 2007-01-06  7:08 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Zachary Amsden, Jeremy Fitzhardinge, Chris Wright, Andrew Morton,
	linux-kernel, Arjan van de Ven, Adrian Bunk


* Rusty Russell <rusty@rustcorp.com.au> wrote:

> diff -r 48f31ae5d7b5 arch/i386/kernel/paravirt.c
> --- a/arch/i386/kernel/paravirt.c	Sat Jan 06 10:32:24 2007 +1100
> +++ b/arch/i386/kernel/paravirt.c	Sat Jan 06 17:23:12 2007 +1100
> @@ -596,6 +596,154 @@ static int __init print_banner(void)
>  	return 0;
>  }
>  core_initcall(print_banner);
> +
> +unsigned long paravirt_save_flags(void)
> +{
> +	return paravirt_ops.save_fl();
> +}
> +EXPORT_SYMBOL(paravirt_save_flags);

ok, i like this one too - i agree that it's better than mine because it 
isolates on a per-API level not on a per-lowlevel-paravirt-op level. But 
this doesnt do the most crutial step: the removal of the paravirt_ops 
export. Without that the module build test is pointless.

btw., your patch does not apply to current -git - could you please 
rebase this patch to the head of your queue so that upstream can pick it 
up?

	Ingo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  7:08     ` Ingo Molnar
@ 2007-01-06  7:12       ` Ingo Molnar
  2007-01-06 17:42       ` Rusty Russell
  1 sibling, 0 replies; 23+ messages in thread
From: Ingo Molnar @ 2007-01-06  7:12 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Zachary Amsden, Jeremy Fitzhardinge, Chris Wright, Andrew Morton,
	linux-kernel, Arjan van de Ven, Adrian Bunk


* Ingo Molnar <mingo@elte.hu> wrote:

> this doesnt do the most crutial step: the removal of the paravirt_ops 
> export. [...]

ah, you removed it already ... it hid at the very last line of the patch 
chunk. Good :)

	Ingo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  6:25   ` Rusty Russell
  2007-01-06  7:08     ` Ingo Molnar
@ 2007-01-06  7:14     ` Ingo Molnar
  2007-01-06  9:50       ` Zachary Amsden
  2007-01-06 16:18       ` Rusty Russell
  1 sibling, 2 replies; 23+ messages in thread
From: Ingo Molnar @ 2007-01-06  7:14 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Zachary Amsden, Jeremy Fitzhardinge, Chris Wright, Andrew Morton,
	linux-kernel, Arjan van de Ven, Adrian Bunk


* Rusty Russell <rusty@rustcorp.com.au> wrote:

> +EXPORT_SYMBOL(clts);
> +EXPORT_SYMBOL(read_cr0);
> +EXPORT_SYMBOL(write_cr0);

mark these a _GPL export. Perhaps even mark the symbol deprecated, to be 
unexported once we fix raid6.

> +EXPORT_SYMBOL(wbinvd);
> +EXPORT_SYMBOL(raw_safe_halt);
> +EXPORT_SYMBOL(halt);
> +EXPORT_SYMBOL_GPL(apic_write);
> +EXPORT_SYMBOL_GPL(apic_read);

these should be _GPL too. If any module uses it and breaks a user's box 
we need that big licensing hint to be able to debug them ...

	Ingo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  7:14     ` Ingo Molnar
@ 2007-01-06  9:50       ` Zachary Amsden
  2007-01-06 10:52         ` Arjan van de Ven
  2007-01-06 16:18       ` Rusty Russell
  1 sibling, 1 reply; 23+ messages in thread
From: Zachary Amsden @ 2007-01-06  9:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rusty Russell, Jeremy Fitzhardinge, Chris Wright, Andrew Morton,
	linux-kernel, Arjan van de Ven, Adrian Bunk

Ingo Molnar wrote:
> * Rusty Russell <rusty@rustcorp.com.au> wrote:
>
>   
>> +EXPORT_SYMBOL(clts);
>> +EXPORT_SYMBOL(read_cr0);
>> +EXPORT_SYMBOL(write_cr0);
>>     
>
> mark these a _GPL export. Perhaps even mark the symbol deprecated, to be 
> unexported once we fix raid6.
>   

read / write cr0 must not be GPL, since kernel_fpu_end uses them and is 
inline.  clts I don't think matters.


>> +EXPORT_SYMBOL(wbinvd);
>> +EXPORT_SYMBOL(raw_safe_halt);
>> +EXPORT_SYMBOL(halt);
>> +EXPORT_SYMBOL_GPL(apic_write);
>> +EXPORT_SYMBOL_GPL(apic_read);
>>     
>
> these should be _GPL too. If any module uses it and breaks a user's box 
> we need that big licensing hint to be able to debug them ...
>   

Perhaps also, MSRs are too dangerous for binary modules to be messing with.

Agree on halt - but wbinvd can theoretically be used for device mapped 
memory consistency.

Zach

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  9:50       ` Zachary Amsden
@ 2007-01-06 10:52         ` Arjan van de Ven
  0 siblings, 0 replies; 23+ messages in thread
From: Arjan van de Ven @ 2007-01-06 10:52 UTC (permalink / raw)
  To: Zachary Amsden
  Cc: Ingo Molnar, Rusty Russell, Jeremy Fitzhardinge, Chris Wright,
	Andrew Morton, linux-kernel, Adrian Bunk

On Sat, 2007-01-06 at 01:50 -0800, Zachary Amsden wrote:
> Ingo Molnar wrote:
> > * Rusty Russell <rusty@rustcorp.com.au> wrote:
> >
> >   
> >> +EXPORT_SYMBOL(clts);
> >> +EXPORT_SYMBOL(read_cr0);
> >> +EXPORT_SYMBOL(write_cr0);
> >>     
> >
> > mark these a _GPL export. Perhaps even mark the symbol deprecated, to be 
> > unexported once we fix raid6.
> >   
> 
> read / write cr0 must not be GPL, since kernel_fpu_end uses them

but kernel_fpu_begin() is _GPL already so this is just symmetry...

-- 
if you want to mail me at work (you don't), use arjan (at) linux.intel.com
Test the interaction between Linux and your BIOS via http://www.linuxfirmwarekit.org


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  7:14     ` Ingo Molnar
  2007-01-06  9:50       ` Zachary Amsden
@ 2007-01-06 16:18       ` Rusty Russell
  2007-01-06 19:31         ` Christoph Hellwig
  1 sibling, 1 reply; 23+ messages in thread
From: Rusty Russell @ 2007-01-06 16:18 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Zachary Amsden, Jeremy Fitzhardinge, Chris Wright, Andrew Morton,
	linux-kernel, Arjan van de Ven, Adrian Bunk

On Sat, 2007-01-06 at 08:14 +0100, Ingo Molnar wrote:
> * Rusty Russell <rusty@rustcorp.com.au> wrote:
> 
> > +EXPORT_SYMBOL(clts);
> > +EXPORT_SYMBOL(read_cr0);
> > +EXPORT_SYMBOL(write_cr0);
> 
> mark these a _GPL export. Perhaps even mark the symbol deprecated, to be 
> unexported once we fix raid6.

OK...

> > +EXPORT_SYMBOL(wbinvd);
> > +EXPORT_SYMBOL(raw_safe_halt);
> > +EXPORT_SYMBOL(halt);
> > +EXPORT_SYMBOL_GPL(apic_write);
> > +EXPORT_SYMBOL_GPL(apic_read);
> 
> these should be _GPL too. If any module uses it and breaks a user's box 
> we need that big licensing hint to be able to debug them ...

OK.  I GPLed the ones which I thought were really obscure, but I was
trying to follow existing policy of not _GPL'ing existing symbols.

Cheers,
Rusty.

PS.  drm_memory.h has a "drm_follow_page": this forces us to uninline
various page tables ops.  Can this use follow_page() somehow, or do we
need an "__follow_page" export for this case?





^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06  7:08     ` Ingo Molnar
  2007-01-06  7:12       ` Ingo Molnar
@ 2007-01-06 17:42       ` Rusty Russell
  2007-01-06 18:32         ` Ingo Molnar
  2007-01-06 20:55         ` Zachary Amsden
  1 sibling, 2 replies; 23+ messages in thread
From: Rusty Russell @ 2007-01-06 17:42 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Zachary Amsden, Jeremy Fitzhardinge, Chris Wright, Andrew Morton,
	linux-kernel, Arjan van de Ven, Adrian Bunk

On Sat, 2007-01-06 at 08:08 +0100, Ingo Molnar wrote:
> btw., your patch does not apply to current -git - could you please 
> rebase this patch to the head of your queue so that upstream can pick it 
> up?

OK, here it is against rc3-git4.

Name: don't export paravirt_ops structure, do individual functions

Wrap the paravirt_ops members we want to export in wrapper functions.
Since we binary-patch the critical ones, this doesn't make a speed
impact.

I moved drm_follow_page into the core, to avoid having to wrap the
various pte ops.  Unlining kernel_fpu_end and using that in the RAID6
code would remove the need to export clts/read_cr0/write_cr0 too.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/dontdiff --minimal linux-2.6.20-rc3-git4/arch/i386/kernel/paravirt.c working-2.6.20-rc3-git4/arch/i386/kernel/paravirt.c
--- linux-2.6.20-rc3-git4/arch/i386/kernel/paravirt.c	2007-01-07 03:41:32.000000000 +1100
+++ working-2.6.20-rc3-git4/arch/i386/kernel/paravirt.c	2007-01-07 04:21:59.000000000 +1100
@@ -482,6 +482,123 @@ static int __init print_banner(void)
 }
 core_initcall(print_banner);
 
+unsigned long paravirt_save_flags(void)
+{
+	return paravirt_ops.save_fl();
+}
+EXPORT_SYMBOL(paravirt_save_flags);
+
+void paravirt_restore_flags(unsigned long flags)
+{
+	paravirt_ops.restore_fl(flags);
+}
+EXPORT_SYMBOL(paravirt_restore_flags);
+
+void paravirt_irq_disable(void)
+{
+	paravirt_ops.irq_disable();
+}
+EXPORT_SYMBOL(paravirt_irq_disable);
+
+void paravirt_irq_enable(void)
+{
+	paravirt_ops.irq_enable();
+}
+EXPORT_SYMBOL(paravirt_irq_enable);
+
+void paravirt_io_delay(void)
+{
+	paravirt_ops.io_delay();
+}
+EXPORT_SYMBOL(paravirt_io_delay);
+
+void paravirt_const_udelay(unsigned long loops)
+{
+	paravirt_ops.const_udelay(loops);
+}
+EXPORT_SYMBOL(paravirt_const_udelay);
+
+u64 paravirt_read_msr(unsigned int msr, int *err)
+{
+	return paravirt_ops.read_msr(msr, err);
+}
+EXPORT_SYMBOL(paravirt_read_msr);
+
+int paravirt_write_msr(unsigned int msr, u64 val)
+{
+	return paravirt_ops.write_msr(msr, val);
+}
+EXPORT_SYMBOL(paravirt_write_msr);
+
+u64 paravirt_read_tsc(void)
+{
+	return paravirt_ops.read_tsc();
+}
+EXPORT_SYMBOL(paravirt_read_tsc);
+
+int paravirt_enabled(void)
+{
+	return paravirt_ops.paravirt_enabled;
+}
+EXPORT_SYMBOL(paravirt_enabled);
+
+void clts(void)
+{
+	paravirt_ops.clts();
+}
+EXPORT_SYMBOL(clts);
+
+unsigned long read_cr0(void)
+{
+	return paravirt_ops.read_cr0();
+}
+EXPORT_SYMBOL_GPL(read_cr0);
+
+void write_cr0(unsigned long cr0)
+{
+	paravirt_ops.write_cr0(cr0);
+}
+EXPORT_SYMBOL_GPL(write_cr0);
+
+void wbinvd(void)
+{
+	paravirt_ops.wbinvd();
+}
+EXPORT_SYMBOL(wbinvd);
+
+void raw_safe_halt(void)
+{
+	paravirt_ops.safe_halt();
+}
+EXPORT_SYMBOL_GPL(raw_safe_halt);
+
+void halt(void)
+{
+	paravirt_ops.safe_halt();
+}
+EXPORT_SYMBOL_GPL(halt);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+void apic_write(unsigned long reg, unsigned long v)
+{
+	paravirt_ops.apic_write(reg,v);
+}
+EXPORT_SYMBOL_GPL(apic_write);
+
+unsigned long apic_read(unsigned long reg)
+{
+	return paravirt_ops.apic_read(reg);
+}
+EXPORT_SYMBOL_GPL(apic_read);
+#endif
+
+void __cpuid(unsigned int *eax, unsigned int *ebx,
+	     unsigned int *ecx, unsigned int *edx)
+{
+	paravirt_ops.cpuid(eax, ebx, ecx, edx);
+}
+EXPORT_SYMBOL(__cpuid);
+
 /* We simply declare start_kernel to be the paravirt probe of last resort. */
 paravirt_probe(start_kernel);
 
@@ -566,4 +683,3 @@ struct paravirt_ops paravirt_ops = {
 	.irq_enable_sysexit = native_irq_enable_sysexit,
 	.iret = native_iret,
 };
-EXPORT_SYMBOL(paravirt_ops);
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/dontdiff --minimal linux-2.6.20-rc3-git4/drivers/char/drm/drm_memory.h working-2.6.20-rc3-git4/drivers/char/drm/drm_memory.h
--- linux-2.6.20-rc3-git4/drivers/char/drm/drm_memory.h	2006-09-22 15:36:13.000000000 +1000
+++ working-2.6.20-rc3-git4/drivers/char/drm/drm_memory.h	2007-01-07 04:19:07.000000000 +1100
@@ -58,11 +58,7 @@
 
 static inline unsigned long drm_follow_page(void *vaddr)
 {
-	pgd_t *pgd = pgd_offset_k((unsigned long)vaddr);
-	pud_t *pud = pud_offset(pgd, (unsigned long)vaddr);
-	pmd_t *pmd = pmd_offset(pud, (unsigned long)vaddr);
-	pte_t *ptep = pte_offset_kernel(pmd, (unsigned long)vaddr);
-	return pte_pfn(*ptep) << PAGE_SHIFT;
+	return __follow_page(vaddr);
 }
 
 #else				/* __OS_HAS_AGP */
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/dontdiff --minimal linux-2.6.20-rc3-git4/include/asm-i386/delay.h working-2.6.20-rc3-git4/include/asm-i386/delay.h
--- linux-2.6.20-rc3-git4/include/asm-i386/delay.h	2007-01-07 03:42:32.000000000 +1100
+++ working-2.6.20-rc3-git4/include/asm-i386/delay.h	2007-01-07 04:08:46.000000000 +1100
@@ -17,9 +17,9 @@ extern void __const_udelay(unsigned long
 extern void __delay(unsigned long loops);
 
 #if defined(CONFIG_PARAVIRT) && !defined(USE_REAL_TIME_DELAY)
-#define udelay(n) paravirt_ops.const_udelay((n) * 0x10c7ul)
+#define udelay(n) paravirt_const_udelay((n) * 0x10c7ul)
 
-#define ndelay(n) paravirt_ops.const_udelay((n) * 5ul)
+#define ndelay(n) paravirt_const_udelay((n) * 5ul)
 
 #else /* !PARAVIRT || USE_REAL_TIME_DELAY */
 
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/dontdiff --minimal linux-2.6.20-rc3-git4/include/asm-i386/paravirt.h working-2.6.20-rc3-git4/include/asm-i386/paravirt.h
--- linux-2.6.20-rc3-git4/include/asm-i386/paravirt.h	2007-01-07 03:42:33.000000000 +1100
+++ working-2.6.20-rc3-git4/include/asm-i386/paravirt.h	2007-01-07 04:13:44.000000000 +1100
@@ -152,8 +152,6 @@ struct paravirt_ops
 
 extern struct paravirt_ops paravirt_ops;
 
-#define paravirt_enabled() (paravirt_ops.paravirt_enabled)
-
 static inline void load_esp0(struct tss_struct *tss,
 			     struct thread_struct *thread)
 {
@@ -177,11 +175,8 @@ static inline void do_time_init(void)
 }
 
 /* The paravirtualized CPUID instruction. */
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
-			   unsigned int *ecx, unsigned int *edx)
-{
-	paravirt_ops.cpuid(eax, ebx, ecx, edx);
-}
+void __cpuid(unsigned int *eax, unsigned int *ebx,
+	     unsigned int *ecx, unsigned int *edx);
 
 /*
  * These special macros can be used to get or set a debugging register
@@ -189,11 +184,6 @@ static inline void __cpuid(unsigned int 
 #define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg)
 #define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val)
 
-#define clts() paravirt_ops.clts()
-
-#define read_cr0() paravirt_ops.read_cr0()
-#define write_cr0(x) paravirt_ops.write_cr0(x)
-
 #define read_cr2() paravirt_ops.read_cr2()
 #define write_cr2(x) paravirt_ops.write_cr2(x)
 
@@ -204,62 +194,51 @@ static inline void __cpuid(unsigned int 
 #define read_cr4_safe(x) paravirt_ops.read_cr4_safe()
 #define write_cr4(x) paravirt_ops.write_cr4(x)
 
-static inline void raw_safe_halt(void)
-{
-	paravirt_ops.safe_halt();
-}
-
-static inline void halt(void)
-{
-	paravirt_ops.safe_halt();
-}
-#define wbinvd() paravirt_ops.wbinvd()
-
 #define get_kernel_rpl()  (paravirt_ops.kernel_rpl)
 
 #define rdmsr(msr,val1,val2) do {				\
 	int _err;						\
-	u64 _l = paravirt_ops.read_msr(msr,&_err);		\
+	u64 _l = paravirt_read_msr(msr,&_err);			\
 	val1 = (u32)_l;						\
 	val2 = _l >> 32;					\
 } while(0)
 
 #define wrmsr(msr,val1,val2) do {				\
 	u64 _l = ((u64)(val2) << 32) | (val1);			\
-	paravirt_ops.write_msr((msr), _l);			\
+	paravirt_write_msr((msr), _l);				\
 } while(0)
 
 #define rdmsrl(msr,val) do {					\
 	int _err;						\
-	val = paravirt_ops.read_msr((msr),&_err);		\
+	val = paravirt_read_msr((msr),&_err);			\
 } while(0)
 
-#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val)))
+#define wrmsrl(msr,val) (paravirt_write_msr((msr),(val)))
 #define wrmsr_safe(msr,a,b) ({					\
 	u64 _l = ((u64)(b) << 32) | (a);			\
-	paravirt_ops.write_msr((msr),_l);			\
+	paravirt_write_msr((msr),_l);				\
 })
 
 /* rdmsr with exception handling */
 #define rdmsr_safe(msr,a,b) ({					\
 	int _err;						\
-	u64 _l = paravirt_ops.read_msr(msr,&_err);		\
+	u64 _l = paravirt_read_msr(msr,&_err);			\
 	(*a) = (u32)_l;						\
 	(*b) = _l >> 32;					\
 	_err; })
 
 #define rdtsc(low,high) do {					\
-	u64 _l = paravirt_ops.read_tsc();			\
+	u64 _l = paravirt_read_tsc();				\
 	low = (u32)_l;						\
 	high = _l >> 32;					\
 } while(0)
 
 #define rdtscl(low) do {					\
-	u64 _l = paravirt_ops.read_tsc();			\
+	u64 _l = paravirt_read_tsc();				\
 	low = (int)_l;						\
 } while(0)
 
-#define rdtscll(val) (val = paravirt_ops.read_tsc())
+#define rdtscll(val) (val = paravirt_read_tsc())
 
 #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
 
@@ -345,6 +324,26 @@ static inline void pte_update_defer(stru
 	paravirt_ops.pte_update_defer(mm, addr, ptep);
 }
 
+/* These are the functions exported to modules. */
+int paravirt_enabled(void);
+unsigned long paravirt_save_flags(void);
+void paravirt_restore_flags(unsigned long flags);
+void paravirt_irq_disable(void);
+void paravirt_irq_enable(void);
+void paravirt_const_udelay(unsigned long loops);
+void paravirt_io_delay(void);
+u64 paravirt_read_msr(unsigned int msr, int *err);
+int paravirt_write_msr(unsigned int msr, u64 val);
+u64 paravirt_read_tsc(void);
+void raw_safe_halt(void);
+void halt(void);
+void wbinvd(void);
+
+/* These will be unexported once raid6 is fixed... */
+void clts(void);
+unsigned long read_cr0(void);
+void write_cr0(unsigned long);
+
 #ifdef CONFIG_X86_PAE
 static inline void set_pte_atomic(pte_t *ptep, pte_t pteval)
 {
@@ -394,42 +393,38 @@ static inline unsigned long __raw_local_
 	unsigned long f;
 
 	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
-					   "call *%1;"
+					   "call paravirt_save_flags;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_SAVE_FLAGS, CLBR_NONE)
-			     : "=a"(f): "m"(paravirt_ops.save_fl)
-			     : "memory", "cc");
+			     : "=a"(f) : : "memory", "cc");
 	return f;
 }
 
 static inline void raw_local_irq_restore(unsigned long f)
 {
 	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
-					   "call *%1;"
+					   "call paravirt_restore_flags;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_RESTORE_FLAGS, CLBR_EAX)
-			     : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f)
-			     : "memory", "cc");
+			     : "=a"(f) : "0"(f) : "memory", "cc");
 }
 
 static inline void raw_local_irq_disable(void)
 {
 	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
-					   "call *%0;"
+					   "call paravirt_irq_disable;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_IRQ_DISABLE, CLBR_EAX)
-			     : : "m" (paravirt_ops.irq_disable)
-			     : "memory", "eax", "cc");
+			     : : : "memory", "eax", "cc");
 }
 
 static inline void raw_local_irq_enable(void)
 {
 	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
-					   "call *%0;"
+					   "call paravirt_irq_enable;"
 					   "popl %%edx; popl %%ecx",
 					  PARAVIRT_IRQ_ENABLE, CLBR_EAX)
-			     : : "m" (paravirt_ops.irq_enable)
-			     : "memory", "eax", "cc");
+			     : : : "memory", "eax", "cc");
 }
 
 static inline unsigned long __raw_local_irq_save(void)
@@ -437,15 +432,13 @@ static inline unsigned long __raw_local_
 	unsigned long f;
 
 	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
-					   "call *%1; pushl %%eax;"
-					   "call *%2; popl %%eax;"
-					   "popl %%edx; popl %%ecx",
+					   "call paravirt_save_flags;"
+					   "pushl %%eax;"
+					   "call paravirt_irq_disable;"
+					   "popl %%eax;popl %%edx; popl %%ecx",
 					  PARAVIRT_SAVE_FLAGS_IRQ_DISABLE,
 					  CLBR_NONE)
-			     : "=a"(f)
-			     : "m" (paravirt_ops.save_fl),
-			       "m" (paravirt_ops.irq_disable)
-			     : "memory", "cc");
+			     : "=a"(f) : : "memory", "cc");
 	return f;
 }
 
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/dontdiff --minimal linux-2.6.20-rc3-git4/include/linux/irqflags.h working-2.6.20-rc3-git4/include/linux/irqflags.h
--- linux-2.6.20-rc3-git4/include/linux/irqflags.h	2006-09-22 15:37:14.000000000 +1000
+++ working-2.6.20-rc3-git4/include/linux/irqflags.h	2007-01-07 04:08:46.000000000 +1100
@@ -74,11 +74,11 @@
 #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
-#define safe_halt()						\
-	do {							\
-		trace_hardirqs_on();				\
-		raw_safe_halt();				\
-	} while (0)
+static inline void safe_halt(void)
+{
+	trace_hardirqs_on();
+	raw_safe_halt();
+}
 
 #define local_save_flags(flags)		raw_local_save_flags(flags)
 
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/dontdiff --minimal linux-2.6.20-rc3-git4/include/linux/mm.h working-2.6.20-rc3-git4/include/linux/mm.h
--- linux-2.6.20-rc3-git4/include/linux/mm.h	2007-01-07 03:42:43.000000000 +1100
+++ working-2.6.20-rc3-git4/include/linux/mm.h	2007-01-07 04:20:41.000000000 +1100
@@ -1127,6 +1127,8 @@ struct page *follow_page(struct vm_area_
 #define FOLL_GET	0x04	/* do get_page on page */
 #define FOLL_ANON	0x08	/* give ZERO_PAGE if no pgtable */
 
+unsigned long __follow_page(void *vaddr);
+
 #ifdef CONFIG_PROC_FS
 void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
 #else
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/dontdiff --minimal linux-2.6.20-rc3-git4/mm/memory.c working-2.6.20-rc3-git4/mm/memory.c
--- linux-2.6.20-rc3-git4/mm/memory.c	2007-01-07 03:42:49.000000000 +1100
+++ working-2.6.20-rc3-git4/mm/memory.c	2007-01-07 04:19:20.000000000 +1100
@@ -976,6 +976,17 @@ no_page_table:
 	return page;
 }
 
+/* You don't want to use this function.  It's for drm_memory.c. */
+unsigned long __follow_page(void *vaddr)
+{
+	pgd_t *pgd = pgd_offset_k((unsigned long)vaddr);
+	pud_t *pud = pud_offset(pgd, (unsigned long)vaddr);
+	pmd_t *pmd = pmd_offset(pud, (unsigned long)vaddr);
+	pte_t *ptep = pte_offset_kernel(pmd, (unsigned long)vaddr);
+	return pte_pfn(*ptep) << PAGE_SHIFT;
+}
+EXPORT_SYMBOL_GPL(__follow_page);
+
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned long start, int len, int write, int force,
 		struct page **pages, struct vm_area_struct **vmas)



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06 17:42       ` Rusty Russell
@ 2007-01-06 18:32         ` Ingo Molnar
  2007-01-06 20:55         ` Zachary Amsden
  1 sibling, 0 replies; 23+ messages in thread
From: Ingo Molnar @ 2007-01-06 18:32 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Zachary Amsden, Jeremy Fitzhardinge, Chris Wright, Andrew Morton,
	linux-kernel, Arjan van de Ven, Adrian Bunk


* Rusty Russell <rusty@rustcorp.com.au> wrote:

> I moved drm_follow_page into the core, to avoid having to wrap the 
> various pte ops.  Unlining kernel_fpu_end and using that in the RAID6 
> code would remove the need to export clts/read_cr0/write_cr0 too.

yeah, let's do that too.

	Ingo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06 16:18       ` Rusty Russell
@ 2007-01-06 19:31         ` Christoph Hellwig
  2007-01-07  5:35           ` Rusty Russell
  2007-01-07 18:39           ` Christoph Hellwig
  0 siblings, 2 replies; 23+ messages in thread
From: Christoph Hellwig @ 2007-01-06 19:31 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Ingo Molnar, Zachary Amsden, Jeremy Fitzhardinge, Chris Wright,
	Andrew Morton, linux-kernel, Arjan van de Ven, Adrian Bunk,
	airlied

On Sun, Jan 07, 2007 at 03:18:45AM +1100, Rusty Russell wrote:
> PS.  drm_memory.h has a "drm_follow_page": this forces us to uninline
> various page tables ops.  Can this use follow_page() somehow, or do we
> need an "__follow_page" export for this case?

Not if avoidable.  And it seems avoidable as drm really should be using
vmalloc_to_page.  Untested patch below:


Index: linux-2.6/drivers/char/drm/drm_memory.c
===================================================================
--- linux-2.6.orig/drivers/char/drm/drm_memory.c	2007-01-06 20:21:07.000000000 +0100
+++ linux-2.6/drivers/char/drm/drm_memory.c	2007-01-06 20:29:03.000000000 +0100
@@ -211,6 +211,23 @@
 }
 #endif  /*  0  */
 
+static int is_agp_mapping(void *pt, unsigned long size, drm_device_t *dev)
+{
+	unsigned long addr = (unsigned long)pt;
+
+	if (addr >= VMALLOC_START && addr < VMALLOC_END) {
+		unsigned long phys;
+		drm_map_t *map;
+
+		phys = page_to_phys(vmalloc_to_page(pt)) + offset_in_page(pt);
+		map = drm_lookup_map(phys, size, dev);
+		if (map && map->type == _DRM_AGP)
+			return 1;
+	}
+
+	return 0;
+}
+
 void drm_ioremapfree(void *pt, unsigned long size,
 				   drm_device_t * dev)
 {
@@ -219,21 +236,11 @@
 	 * routines for handling mappings in the AGP space.  Hopefully this can be done in
 	 * a future revision of the interface...
 	 */
-	if (drm_core_has_AGP(dev) && dev->agp && dev->agp->cant_use_aperture
-	    && ((unsigned long)pt >= VMALLOC_START
-		&& (unsigned long)pt < VMALLOC_END)) {
-		unsigned long offset;
-		drm_map_t *map;
-
-		offset = drm_follow_page(pt) | ((unsigned long)pt & ~PAGE_MASK);
-		map = drm_lookup_map(offset, size, dev);
-		if (map && map->type == _DRM_AGP) {
-			vunmap(pt);
-			return;
-		}
-	}
-
-	iounmap(pt);
+	if (drm_core_has_AGP(dev) && dev->agp && dev->agp->cant_use_aperture &&
+	    is_agp_mapping(pt, size, dev))
+		vunmap(pt);
+	else
+		iounmap(pt);
 }
 EXPORT_SYMBOL(drm_ioremapfree);
 
Index: linux-2.6/drivers/char/drm/drm_memory.h
===================================================================
--- linux-2.6.orig/drivers/char/drm/drm_memory.h	2007-01-06 20:21:07.000000000 +0100
+++ linux-2.6/drivers/char/drm/drm_memory.h	2007-01-06 20:26:50.000000000 +0100
@@ -55,23 +55,6 @@
 #  define PAGE_AGP	PAGE_KERNEL
 # endif
 #endif
-
-static inline unsigned long drm_follow_page(void *vaddr)
-{
-	pgd_t *pgd = pgd_offset_k((unsigned long)vaddr);
-	pud_t *pud = pud_offset(pgd, (unsigned long)vaddr);
-	pmd_t *pmd = pmd_offset(pud, (unsigned long)vaddr);
-	pte_t *ptep = pte_offset_kernel(pmd, (unsigned long)vaddr);
-	return pte_pfn(*ptep) << PAGE_SHIFT;
-}
-
-#else				/* __OS_HAS_AGP */
-
-static inline unsigned long drm_follow_page(void *vaddr)
-{
-	return 0;
-}
-
 #endif
 
 void *drm_ioremap(unsigned long offset, unsigned long size,

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06 17:42       ` Rusty Russell
  2007-01-06 18:32         ` Ingo Molnar
@ 2007-01-06 20:55         ` Zachary Amsden
  2007-01-07  1:09           ` Rusty Russell
  1 sibling, 1 reply; 23+ messages in thread
From: Zachary Amsden @ 2007-01-06 20:55 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Ingo Molnar, Jeremy Fitzhardinge, Chris Wright, Andrew Morton,
	linux-kernel, Arjan van de Ven, Adrian Bunk

Rusty Russell wrote:
>
> +int paravirt_write_msr(unsigned int msr, u64 val);

If binary modules using debug registers makes us nervous, the 
reprogramming MSRs is also similarly bad.


> +void raw_safe_halt(void);
> +void halt(void);
>   

These shouldn't be done by modules, ever.  Only the scheduler should 
decide to halt.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06 20:55         ` Zachary Amsden
@ 2007-01-07  1:09           ` Rusty Russell
  2007-01-07 14:07             ` Zachary Amsden
  0 siblings, 1 reply; 23+ messages in thread
From: Rusty Russell @ 2007-01-07  1:09 UTC (permalink / raw)
  To: Zachary Amsden
  Cc: Ingo Molnar, Jeremy Fitzhardinge, Chris Wright, Andrew Morton,
	linux-kernel, Arjan van de Ven, Adrian Bunk

On Sat, 2007-01-06 at 12:55 -0800, Zachary Amsden wrote:
> Rusty Russell wrote:
> >
> > +int paravirt_write_msr(unsigned int msr, u64 val);
> 
> If binary modules using debug registers makes us nervous, the 
> reprogramming MSRs is also similarly bad.

Yes, but this is simply from experience.  Several modules wrote msrs
(you can take out the EXPORT_SYMBOL and find them quite quickly).

> > +void raw_safe_halt(void);
> > +void halt(void);
> 
> These shouldn't be done by modules, ever.  Only the scheduler should 
> decide to halt.

Several modules implement alternate halt loops.  I guess being in a
module for specific CPUs makes sense...

Cheers!
Rusty.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06 19:31         ` Christoph Hellwig
@ 2007-01-07  5:35           ` Rusty Russell
  2007-01-07 18:39           ` Christoph Hellwig
  1 sibling, 0 replies; 23+ messages in thread
From: Rusty Russell @ 2007-01-07  5:35 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Ingo Molnar, Zachary Amsden, Jeremy Fitzhardinge, Chris Wright,
	Andrew Morton, linux-kernel, Arjan van de Ven, Adrian Bunk,
	airlied

On Sat, 2007-01-06 at 19:31 +0000, Christoph Hellwig wrote:
> On Sun, Jan 07, 2007 at 03:18:45AM +1100, Rusty Russell wrote:
> > PS.  drm_memory.h has a "drm_follow_page": this forces us to uninline
> > various page tables ops.  Can this use follow_page() somehow, or do we
> > need an "__follow_page" export for this case?
> 
> Not if avoidable.  And it seems avoidable as drm really should be using
> vmalloc_to_page.  Untested patch below:

Thanks Christoph, that patch looks great to me!  I didn't know about
vmalloc_to_page...

Want to produce a signed-off version?

Thanks,
Rusty.



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-07  1:09           ` Rusty Russell
@ 2007-01-07 14:07             ` Zachary Amsden
  0 siblings, 0 replies; 23+ messages in thread
From: Zachary Amsden @ 2007-01-07 14:07 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Ingo Molnar, Jeremy Fitzhardinge, Chris Wright, Andrew Morton,
	linux-kernel, Arjan van de Ven, Adrian Bunk

Rusty Russell wrote:
> On Sat, 2007-01-06 at 12:55 -0800, Zachary Amsden wrote:
>   
>> Rusty Russell wrote:
>>     
>>> +int paravirt_write_msr(unsigned int msr, u64 val);
>>>       
>> If binary modules using debug registers makes us nervous, the 
>> reprogramming MSRs is also similarly bad.
>>     
>
> Yes, but this is simply from experience.  Several modules wrote msrs
> (you can take out the EXPORT_SYMBOL and find them quite quickly).
>   

Several in tree, GPL'd modules did so.  I'm not aware of out of tree 
modules that do that, and if they do, they are misbehaving.  
Reprogramming MTRR memory regions under the kernel's nose is not a 
proper way to behave, and this is the most benign use I can think of for 
write access to MSRs.  If this really breaks any code out there, then 
there should be a proper, controlled API to do this so the kernel can 
manage it.

>>> +void raw_safe_halt(void);
>>> +void halt(void);
>>>       
>> These shouldn't be done by modules, ever.  Only the scheduler should 
>> decide to halt.
>>     
>
> Several modules implement alternate halt loops.  I guess being in a
> module for specific CPUs makes sense...
>   

Yes, but halting is a behavior that can easily introduce critical, grind 
to a halt problems because of race conditions.  I have no problems 
having modules implement alternative halt loops.  I think there is a 
serious debuggability issue with binary modules invoking halt directly.

Zach

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-06 19:31         ` Christoph Hellwig
  2007-01-07  5:35           ` Rusty Russell
@ 2007-01-07 18:39           ` Christoph Hellwig
  2007-01-07 18:43             ` Ingo Molnar
  2007-01-08  0:54             ` Dave Airlie
  1 sibling, 2 replies; 23+ messages in thread
From: Christoph Hellwig @ 2007-01-07 18:39 UTC (permalink / raw)
  To: Christoph Hellwig, Rusty Russell, Ingo Molnar, Zachary Amsden,
	Jeremy Fitzhardinge, Chris Wright, Andrew Morton, linux-kernel,
	Arjan van de Ven, Adrian Bunk, airlied

[-- Attachment #1: Type: text/plain, Size: 978 bytes --]

On Sat, Jan 06, 2007 at 07:31:52PM +0000, Christoph Hellwig wrote:
> On Sun, Jan 07, 2007 at 03:18:45AM +1100, Rusty Russell wrote:
> > PS.  drm_memory.h has a "drm_follow_page": this forces us to uninline
> > various page tables ops.  Can this use follow_page() somehow, or do we
> > need an "__follow_page" export for this case?
> 
> Not if avoidable.  And it seems avoidable as drm really should be using
> vmalloc_to_page.  Untested patch below:

Even better we can actualy avid most of the page table walks completely.
First there is a number of places that can never have the vmalloc case
an may use ioremap/iounmap directly.  Secondly drm_core_ioremap/
drm_core_ioremapfree already have the right drm_map to check wich kind
of mapping we have - we just need to use it instead of discarding that
information!  The only leaves direct drm_ioremapfree in i810_dma.c and
i830_dma.c which I don't quite manage to follow.  Maybe Dave has an
idea how to get rid of those aswell.

[-- Attachment #2: drm-avoid-drm_ioremap --]
[-- Type: text/plain, Size: 1987 bytes --]

Index: linux-2.6/drivers/char/drm/drm_bufs.c
===================================================================
--- linux-2.6.orig/drivers/char/drm/drm_bufs.c	2007-01-07 14:07:18.000000000 +0100
+++ linux-2.6/drivers/char/drm/drm_bufs.c	2007-01-07 14:09:40.000000000 +0100
@@ -178,7 +178,7 @@
 			}
 		}
 		if (map->type == _DRM_REGISTERS)
-			map->handle = drm_ioremap(map->offset, map->size, dev);
+			map->handle = ioremap(map->offset, map->size);
 		break;
 
 	case _DRM_SHM:
@@ -238,7 +238,7 @@
 	list = drm_alloc(sizeof(*list), DRM_MEM_MAPS);
 	if (!list) {
 		if (map->type == _DRM_REGISTERS)
-			drm_ioremapfree(map->handle, map->size, dev);
+			iounmap(map->handle);
 		drm_free(map, sizeof(*map), DRM_MEM_MAPS);
 		return -EINVAL;
 	}
@@ -255,7 +255,7 @@
 	ret = drm_map_handle(dev, &list->hash, user_token, 0);
 	if (ret) {
 		if (map->type == _DRM_REGISTERS)
-			drm_ioremapfree(map->handle, map->size, dev);
+			iounmap(map->handle);
 		drm_free(map, sizeof(*map), DRM_MEM_MAPS);
 		drm_free(list, sizeof(*list), DRM_MEM_MAPS);
 		mutex_unlock(&dev->struct_mutex);
@@ -362,7 +362,7 @@
 
 	switch (map->type) {
 	case _DRM_REGISTERS:
-		drm_ioremapfree(map->handle, map->size, dev);
+		iounmap(map->handle);
 		/* FALLTHROUGH */
 	case _DRM_FRAME_BUFFER:
 		if (drm_core_has_MTRR(dev) && map->mtrr >= 0) {
Index: linux-2.6/drivers/char/drm/drm_vm.c
===================================================================
--- linux-2.6.orig/drivers/char/drm/drm_vm.c	2007-01-07 14:07:18.000000000 +0100
+++ linux-2.6/drivers/char/drm/drm_vm.c	2007-01-07 14:10:50.000000000 +0100
@@ -227,7 +227,12 @@
 							   map->size);
 					DRM_DEBUG("mtrr_del = %d\n", retcode);
 				}
-				drm_ioremapfree(map->handle, map->size, dev);
+				/*
+				 * XXX(hch) autmatic mapping/unmapping only happens for
+				 * _DRM_REGISTERS in all other places.  Should we have
+				 * this check here aswell?
+				 */
+				iounmap(map->handle);
 				break;
 			case _DRM_SHM:
 				vfree(map->handle);

[-- Attachment #3: simplify-drm_core_ioremap --]
[-- Type: text/plain, Size: 2816 bytes --]

Index: linux-2.6/drivers/char/drm/drmP.h
===================================================================
--- linux-2.6.orig/drivers/char/drm/drmP.h	2007-01-07 14:13:48.000000000 +0100
+++ linux-2.6/drivers/char/drm/drmP.h	2007-01-07 14:15:32.000000000 +0100
@@ -1059,27 +1059,8 @@
 extern int drm_mm_init(drm_mm_t *mm, unsigned long start, unsigned long size);
 extern void drm_mm_takedown(drm_mm_t *mm);
 
-/* Inline replacements for DRM_IOREMAP macros */
-static __inline__ void drm_core_ioremap(struct drm_map *map,
-					struct drm_device *dev)
-{
-	map->handle = drm_ioremap(map->offset, map->size, dev);
-}
-
-#if 0
-static __inline__ void drm_core_ioremap_nocache(struct drm_map *map,
-						struct drm_device *dev)
-{
-	map->handle = drm_ioremap_nocache(map->offset, map->size, dev);
-}
-#endif  /*  0  */
-
-static __inline__ void drm_core_ioremapfree(struct drm_map *map,
-					    struct drm_device *dev)
-{
-	if (map->handle && map->size)
-		drm_ioremapfree(map->handle, map->size, dev);
-}
+extern void drm_core_ioremap(struct drm_map *map, struct drm_device *dev);
+extern void drm_core_ioremapfree(struct drm_map *map, struct drm_device *dev);
 
 static __inline__ struct drm_map *drm_core_findmap(struct drm_device *dev,
 						   unsigned int token)
Index: linux-2.6/drivers/char/drm/drm_memory.c
===================================================================
--- linux-2.6.orig/drivers/char/drm/drm_memory.c	2007-01-07 14:13:48.000000000 +0100
+++ linux-2.6/drivers/char/drm/drm_memory.c	2007-01-07 14:19:34.000000000 +0100
@@ -197,20 +197,6 @@
 }
 EXPORT_SYMBOL(drm_ioremap);
 
-#if 0
-void *drm_ioremap_nocache(unsigned long offset,
-					unsigned long size, drm_device_t * dev)
-{
-	if (drm_core_has_AGP(dev) && dev->agp && dev->agp->cant_use_aperture) {
-		drm_map_t *map = drm_lookup_map(offset, size, dev);
-
-		if (map && map->type == _DRM_AGP)
-			return agp_remap(offset, size, dev);
-	}
-	return ioremap_nocache(offset, size);
-}
-#endif  /*  0  */
-
 void drm_ioremapfree(void *pt, unsigned long size,
 				   drm_device_t * dev)
 {
@@ -238,3 +224,26 @@
 EXPORT_SYMBOL(drm_ioremapfree);
 
 #endif				/* debug_memory */
+
+void drm_core_ioremap(struct drm_map *map, struct drm_device *dev)
+{
+	if (drm_core_has_AGP(dev) &&
+	    dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP)
+		map->handle = agp_remap(map->offset, map->size, dev);
+	else
+		map->handle = ioremap(map->offset, map->size);
+}
+EXPORT_SYMBOL_GPL(drm_core_ioremap);
+
+void drm_core_ioremapfree(struct drm_map *map, struct drm_device *dev)
+{
+	if (!map->handle || !map->size)
+		return;
+
+	if (drm_core_has_AGP(dev) &&
+	    dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP)
+		vunmap(map->handle);
+	else
+		iounmap(map->handle);
+}
+EXPORT_SYMBOL_GPL(drm_core_ioremapfree);

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-07 18:39           ` Christoph Hellwig
@ 2007-01-07 18:43             ` Ingo Molnar
  2007-01-08  0:54             ` Dave Airlie
  1 sibling, 0 replies; 23+ messages in thread
From: Ingo Molnar @ 2007-01-07 18:43 UTC (permalink / raw)
  To: Christoph Hellwig, Rusty Russell, Zachary Amsden,
	Jeremy Fitzhardinge, Chris Wright, Andrew Morton, linux-kernel,
	Arjan van de Ven, Adrian Bunk, airlied


* Christoph Hellwig <hch@infradead.org> wrote:

> On Sat, Jan 06, 2007 at 07:31:52PM +0000, Christoph Hellwig wrote:
> > On Sun, Jan 07, 2007 at 03:18:45AM +1100, Rusty Russell wrote:
> > > PS.  drm_memory.h has a "drm_follow_page": this forces us to uninline
> > > various page tables ops.  Can this use follow_page() somehow, or do we
> > > need an "__follow_page" export for this case?
> >
> > Not if avoidable.  And it seems avoidable as drm really should be using
> > vmalloc_to_page.  Untested patch below:
>
> Even better we can actualy avid most of the page table walks 
> completely.

agreed. I think there's an important side-observation here as well: 
having inlined functions uninlined and exported puts them under a lot 
more scrutiny. Hence individual exports instead of the global 
paravirt_ops export is a big plus.

        Ingo


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [patch] paravirt: isolate module ops
  2007-01-07 18:39           ` Christoph Hellwig
  2007-01-07 18:43             ` Ingo Molnar
@ 2007-01-08  0:54             ` Dave Airlie
  1 sibling, 0 replies; 23+ messages in thread
From: Dave Airlie @ 2007-01-08  0:54 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Rusty Russell, Ingo Molnar, Zachary Amsden, Jeremy Fitzhardinge,
	Chris Wright, Andrew Morton, linux-kernel, Arjan van de Ven,
	Adrian Bunk


> Even better we can actualy avid most of the page table walks completely.
> First there is a number of places that can never have the vmalloc case
> an may use ioremap/iounmap directly.  Secondly drm_core_ioremap/
> drm_core_ioremapfree already have the right drm_map to check wich kind
> of mapping we have - we just need to use it instead of discarding that
> information!  The only leaves direct drm_ioremapfree in i810_dma.c and
> i830_dma.c which I don't quite manage to follow.  Maybe Dave has an
> idea how to get rid of those aswell.
>

I've applied two patches to the DRM git
http://gitweb.freedesktop.org/?p=mesa/drm.git;a=summary

They need a fair bit of testing, I've tested them no i810, but I need to 
test them on a few other boards before I'd consider putting them in -mm..

Dave.

-- 
David Airlie, Software Engineer
http://www.skynet.ie/~airlied / airlied at skynet.ie
Linux kernel - DRI, VAX / pam_smb / ILUG


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2007-01-08  1:10 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-01-06  0:07 [patch] paravirt: isolate module ops Ingo Molnar
2007-01-06  0:31 ` Zachary Amsden
2007-01-06  6:25   ` Rusty Russell
2007-01-06  7:08     ` Ingo Molnar
2007-01-06  7:12       ` Ingo Molnar
2007-01-06 17:42       ` Rusty Russell
2007-01-06 18:32         ` Ingo Molnar
2007-01-06 20:55         ` Zachary Amsden
2007-01-07  1:09           ` Rusty Russell
2007-01-07 14:07             ` Zachary Amsden
2007-01-06  7:14     ` Ingo Molnar
2007-01-06  9:50       ` Zachary Amsden
2007-01-06 10:52         ` Arjan van de Ven
2007-01-06 16:18       ` Rusty Russell
2007-01-06 19:31         ` Christoph Hellwig
2007-01-07  5:35           ` Rusty Russell
2007-01-07 18:39           ` Christoph Hellwig
2007-01-07 18:43             ` Ingo Molnar
2007-01-08  0:54             ` Dave Airlie
2007-01-06  1:02 ` Zachary Amsden
2007-01-06  2:07   ` Arjan van de Ven
2007-01-06  4:41     ` Zachary Amsden
2007-01-06  5:34     ` Rusty Russell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).