From: Zachary Amsden <zach@vmware.com>
To: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@osdl.org>,
linux-kernel@vger.kernel.org,
Rusty Russell <rusty@rustcorp.com.au>,
Arjan van de Ven <arjan@infradead.org>,
Adrian Bunk <bunk@stusta.de>
Subject: Re: [patch] paravirt: isolate module ops
Date: Fri, 05 Jan 2007 17:02:47 -0800 [thread overview]
Message-ID: <459EF537.6090301@vmware.com> (raw)
In-Reply-To: <20070106000715.GA6688@elte.hu>
[-- Attachment #1: Type: text/plain, Size: 908 bytes --]
Ingo Molnar wrote:
> Subject: [patch] paravirt: isolate module ops
> From: Ingo Molnar <mingo@elte.hu>
>
> only export those operations to modules that have been available to them
> historically: irq disable/enable, io-delay, udelay, etc.
>
> this isolates that functionality from other paravirtualization
> functionality that modules have no business messing with.
>
> boot and build tested with CONFIG_PARAVIRT=y.
>
I would suggest a slightly different carving. For one, no TLB flushes.
If you can't modify PTEs, why do you need to have TLB flushes? And I
would allow CR0 read / write for code which saves and restores FPU state
- possibly even debug register access, although any code which touches
DRs could be doing something sneaky. I'm on the fence about that one.
Here is a partially tested patch against the -mm tree. Let me know what
you think of this slightly different approach.
[-- Attachment #2: ingo-isolation --]
[-- Type: text/plain, Size: 14420 bytes --]
diff -r 3242f865ce8e arch/i386/kernel/paravirt.c
--- a/arch/i386/kernel/paravirt.c Thu Jan 04 20:17:02 2007 -0800
+++ b/arch/i386/kernel/paravirt.c Fri Jan 05 16:48:25 2007 -0800
@@ -492,6 +492,7 @@ struct paravirt_ops paravirt_ops = {
.patch = native_patch,
.banner = default_banner,
+
.arch_setup = native_nop,
.memory_setup = machine_specific_memory_setup,
.get_wallclock = native_get_wallclock,
@@ -577,4 +578,42 @@ struct paravirt_ops paravirt_ops = {
.startup_ipi_hook = (void *)native_nop,
};
-EXPORT_SYMBOL(paravirt_ops);
+
+/*
+ * These are exported to modules:
+ */
+struct paravirt_mod_ops paravirt_mod_ops = {
+ .name = "bare hardware",
+ .paravirt_enabled = 0,
+ .kernel_rpl = 0,
+
+ .patch = native_patch,
+ .banner = default_banner,
+
+ .save_fl = native_save_fl,
+ .restore_fl = native_restore_fl,
+ .irq_disable = native_irq_disable,
+ .irq_enable = native_irq_enable,
+
+ .cpuid = native_cpuid,
+
+ .read_msr = native_read_msr,
+ .write_msr = native_write_msr,
+
+ .read_tsc = native_read_tsc,
+ .read_pmc = native_read_pmc,
+
+ .io_delay = native_io_delay,
+ .const_udelay = __const_udelay,
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ .apic_write = native_apic_write,
+ .apic_write_atomic = native_apic_write_atomic,
+ .apic_read = native_apic_read,
+#endif
+
+ .flush_tlb_user = native_flush_tlb,
+ .flush_tlb_kernel = native_flush_tlb_global,
+ .flush_tlb_single = native_flush_tlb_single,
+};
+EXPORT_SYMBOL(paravirt_mod_ops);
diff -r 3242f865ce8e include/asm-i386/delay.h
--- a/include/asm-i386/delay.h Thu Jan 04 20:17:02 2007 -0800
+++ b/include/asm-i386/delay.h Fri Jan 05 16:11:43 2007 -0800
@@ -17,9 +17,9 @@ extern void __delay(unsigned long loops)
extern void __delay(unsigned long loops);
#if defined(CONFIG_PARAVIRT) && !defined(USE_REAL_TIME_DELAY)
-#define udelay(n) paravirt_ops.const_udelay((n) * 0x10c7ul)
+#define udelay(n) paravirt_mod_ops.const_udelay((n) * 0x10c7ul)
-#define ndelay(n) paravirt_ops.const_udelay((n) * 5ul)
+#define ndelay(n) paravirt_mod_ops.const_udelay((n) * 5ul)
#else /* !PARAVIRT || USE_REAL_TIME_DELAY */
diff -r 3242f865ce8e include/asm-i386/paravirt.h
--- a/include/asm-i386/paravirt.h Thu Jan 04 20:17:02 2007 -0800
+++ b/include/asm-i386/paravirt.h Fri Jan 05 16:51:10 2007 -0800
@@ -29,12 +29,52 @@ struct Xgt_desc_struct;
struct Xgt_desc_struct;
struct tss_struct;
struct mm_struct;
-struct paravirt_ops
+struct paravirt_mod_ops
{
unsigned int kernel_rpl;
int paravirt_enabled;
const char *name;
+ /* All the function pointers here are declared as "fastcall"
+ so that we get a specific register-based calling
+ convention. This makes it easier to implement inline
+ assembler replacements. */
+
+ void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx);
+
+ /* CLTS / cr0 may be used for math state save / restore */
+ void (fastcall *clts)(void);
+ unsigned long (fastcall *read_cr0)(void);
+ void (fastcall *write_cr0)(unsigned long);
+
+ unsigned long (fastcall *save_fl)(void);
+ void (fastcall *restore_fl)(unsigned long);
+ void (fastcall *irq_disable)(void);
+ void (fastcall *irq_enable)(void);
+
+ /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
+ u64 (fastcall *read_msr)(unsigned int msr, int *err);
+ int (fastcall *write_msr)(unsigned int msr, u64 val);
+
+ u64 (fastcall *read_tsc)(void);
+ u64 (fastcall *read_pmc)(void);
+
+ void (fastcall *io_delay)(void);
+ void (*const_udelay)(unsigned long loops);
+
+ /* May be needed by device drivers to flush cache */
+ void (fastcall *wbinvd)(void);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ void (fastcall *apic_write)(unsigned long reg, unsigned long v);
+ void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v);
+ unsigned long (fastcall *apic_read)(unsigned long reg);
+#endif
+};
+
+struct paravirt_ops
+{
/*
* Patch may replace one of the defined code sequences with arbitrary
* code, subject to the same register constraints. This generally
@@ -54,21 +94,8 @@ struct paravirt_ops
int (*set_wallclock)(unsigned long);
void (*time_init)(void);
- /* All the function pointers here are declared as "fastcall"
- so that we get a specific register-based calling
- convention. This makes it easier to implement inline
- assembler replacements. */
-
- void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx);
-
unsigned long (fastcall *get_debugreg)(int regno);
void (fastcall *set_debugreg)(int regno, unsigned long value);
-
- void (fastcall *clts)(void);
-
- unsigned long (fastcall *read_cr0)(void);
- void (fastcall *write_cr0)(unsigned long);
unsigned long (fastcall *read_cr2)(void);
void (fastcall *write_cr2)(unsigned long);
@@ -80,20 +107,8 @@ struct paravirt_ops
unsigned long (fastcall *read_cr4)(void);
void (fastcall *write_cr4)(unsigned long);
- unsigned long (fastcall *save_fl)(void);
- void (fastcall *restore_fl)(unsigned long);
- void (fastcall *irq_disable)(void);
- void (fastcall *irq_enable)(void);
void (fastcall *safe_halt)(void);
void (fastcall *halt)(void);
- void (fastcall *wbinvd)(void);
-
- /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
- u64 (fastcall *read_msr)(unsigned int msr, int *err);
- int (fastcall *write_msr)(unsigned int msr, u64 val);
-
- u64 (fastcall *read_tsc)(void);
- u64 (fastcall *read_pmc)(void);
void (fastcall *load_tr_desc)(void);
void (fastcall *load_gdt)(const struct Xgt_desc_struct *);
@@ -114,13 +129,7 @@ struct paravirt_ops
void (fastcall *set_iopl_mask)(unsigned mask);
- void (fastcall *io_delay)(void);
- void (*const_udelay)(unsigned long loops);
-
#ifdef CONFIG_X86_LOCAL_APIC
- void (fastcall *apic_write)(unsigned long reg, unsigned long v);
- void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v);
- unsigned long (fastcall *apic_read)(unsigned long reg);
void (*setup_boot_clock)(void);
void (*setup_secondary_clock)(void);
#endif
@@ -163,8 +172,9 @@ struct paravirt_ops
__attribute__((__section__(".paravirtprobe"))) = fn
extern struct paravirt_ops paravirt_ops;
-
-#define paravirt_enabled() (paravirt_ops.paravirt_enabled)
+extern struct paravirt_mod_ops paravirt_mod_ops;
+
+#define paravirt_enabled() (paravirt_mod_ops.paravirt_enabled)
static inline void load_esp0(struct tss_struct *tss,
struct thread_struct *thread)
@@ -192,7 +202,7 @@ static inline void __cpuid(unsigned int
static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
- paravirt_ops.cpuid(eax, ebx, ecx, edx);
+ paravirt_mod_ops.cpuid(eax, ebx, ecx, edx);
}
/*
@@ -201,10 +211,10 @@ static inline void __cpuid(unsigned int
#define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg)
#define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val)
-#define clts() paravirt_ops.clts()
-
-#define read_cr0() paravirt_ops.read_cr0()
-#define write_cr0(x) paravirt_ops.write_cr0(x)
+#define clts() paravirt_mod_ops.clts()
+
+#define read_cr0() paravirt_mod_ops.read_cr0()
+#define write_cr0(x) paravirt_mod_ops.write_cr0(x)
#define read_cr2() paravirt_ops.read_cr2()
#define write_cr2(x) paravirt_ops.write_cr2(x)
@@ -225,58 +235,58 @@ static inline void halt(void)
{
paravirt_ops.safe_halt();
}
-#define wbinvd() paravirt_ops.wbinvd()
-
-#define get_kernel_rpl() (paravirt_ops.kernel_rpl)
+#define wbinvd() paravirt_mod_ops.wbinvd()
+
+#define get_kernel_rpl() (paravirt_mod_ops.kernel_rpl)
#define rdmsr(msr,val1,val2) do { \
int _err; \
- u64 _l = paravirt_ops.read_msr(msr,&_err); \
+ u64 _l = paravirt_mod_ops.read_msr(msr,&_err); \
val1 = (u32)_l; \
val2 = _l >> 32; \
} while(0)
#define wrmsr(msr,val1,val2) do { \
u64 _l = ((u64)(val2) << 32) | (val1); \
- paravirt_ops.write_msr((msr), _l); \
+ paravirt_mod_ops.write_msr((msr), _l); \
} while(0)
#define rdmsrl(msr,val) do { \
int _err; \
- val = paravirt_ops.read_msr((msr),&_err); \
-} while(0)
-
-#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val)))
+ val = paravirt_mod_ops.read_msr((msr),&_err); \
+} while(0)
+
+#define wrmsrl(msr,val) (paravirt_mod_ops.write_msr((msr),(val)))
#define wrmsr_safe(msr,a,b) ({ \
u64 _l = ((u64)(b) << 32) | (a); \
- paravirt_ops.write_msr((msr),_l); \
+ paravirt_mod_ops.write_msr((msr),_l); \
})
/* rdmsr with exception handling */
#define rdmsr_safe(msr,a,b) ({ \
int _err; \
- u64 _l = paravirt_ops.read_msr(msr,&_err); \
+ u64 _l = paravirt_mod_ops.read_msr(msr,&_err); \
(*a) = (u32)_l; \
(*b) = _l >> 32; \
_err; })
#define rdtsc(low,high) do { \
- u64 _l = paravirt_ops.read_tsc(); \
+ u64 _l = paravirt_mod_ops.read_tsc(); \
low = (u32)_l; \
high = _l >> 32; \
} while(0)
#define rdtscl(low) do { \
- u64 _l = paravirt_ops.read_tsc(); \
+ u64 _l = paravirt_mod_ops.read_tsc(); \
low = (int)_l; \
} while(0)
-#define rdtscll(val) (val = paravirt_ops.read_tsc())
+#define rdtscll(val) (val = paravirt_mod_ops.read_tsc())
#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
#define rdpmc(counter,low,high) do { \
- u64 _l = paravirt_ops.read_pmc(); \
+ u64 _l = paravirt_mod_ops.read_pmc(); \
low = (u32)_l; \
high = _l >> 32; \
} while(0)
@@ -299,11 +309,11 @@ static inline void halt(void)
/* The paravirtualized I/O functions */
static inline void slow_down_io(void) {
- paravirt_ops.io_delay();
+ paravirt_mod_ops.io_delay();
#ifdef REALLY_SLOW_IO
- paravirt_ops.io_delay();
- paravirt_ops.io_delay();
- paravirt_ops.io_delay();
+ paravirt_mod_ops.io_delay();
+ paravirt_mod_ops.io_delay();
+ paravirt_mod_ops.io_delay();
#endif
}
@@ -313,17 +323,17 @@ static inline void slow_down_io(void) {
*/
static inline void apic_write(unsigned long reg, unsigned long v)
{
- paravirt_ops.apic_write(reg,v);
+ paravirt_mod_ops.apic_write(reg,v);
}
static inline void apic_write_atomic(unsigned long reg, unsigned long v)
{
- paravirt_ops.apic_write_atomic(reg,v);
+ paravirt_mod_ops.apic_write_atomic(reg,v);
}
static inline unsigned long apic_read(unsigned long reg)
{
- return paravirt_ops.apic_read(reg);
+ return paravirt_mod_ops.apic_read(reg);
}
static inline void setup_boot_clock(void)
@@ -447,7 +457,7 @@ static inline unsigned long __raw_local_
"call *%1;"
"popl %%edx; popl %%ecx",
PARAVIRT_SAVE_FLAGS, CLBR_NONE)
- : "=a"(f): "m"(paravirt_ops.save_fl)
+ : "=a"(f): "m"(paravirt_mod_ops.save_fl)
: "memory", "cc");
return f;
}
@@ -458,7 +468,7 @@ static inline void raw_local_irq_restore
"call *%1;"
"popl %%edx; popl %%ecx",
PARAVIRT_RESTORE_FLAGS, CLBR_EAX)
- : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f)
+ : "=a"(f) : "m" (paravirt_mod_ops.restore_fl), "0"(f)
: "memory", "cc");
}
@@ -468,7 +478,7 @@ static inline void raw_local_irq_disable
"call *%0;"
"popl %%edx; popl %%ecx",
PARAVIRT_IRQ_DISABLE, CLBR_EAX)
- : : "m" (paravirt_ops.irq_disable)
+ : : "m" (paravirt_mod_ops.irq_disable)
: "memory", "eax", "cc");
}
@@ -478,7 +488,7 @@ static inline void raw_local_irq_enable(
"call *%0;"
"popl %%edx; popl %%ecx",
PARAVIRT_IRQ_ENABLE, CLBR_EAX)
- : : "m" (paravirt_ops.irq_enable)
+ : : "m" (paravirt_mod_ops.irq_enable)
: "memory", "eax", "cc");
}
@@ -493,26 +503,26 @@ static inline unsigned long __raw_local_
PARAVIRT_SAVE_FLAGS_IRQ_DISABLE,
CLBR_NONE)
: "=a"(f)
- : "m" (paravirt_ops.save_fl),
- "m" (paravirt_ops.irq_disable)
+ : "m" (paravirt_mod_ops.save_fl),
+ "m" (paravirt_mod_ops.irq_disable)
: "memory", "cc");
return f;
}
#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \
- "call *paravirt_ops+%c[irq_disable];" \
+ "call *paravirt_mod_ops+%c[irq_disable];" \
"popl %%edx; popl %%ecx", \
PARAVIRT_IRQ_DISABLE, CLBR_EAX)
#define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \
- "call *paravirt_ops+%c[irq_enable];" \
+ "call *paravirt_mod_ops+%c[irq_enable];" \
"popl %%edx; popl %%ecx", \
PARAVIRT_IRQ_ENABLE, CLBR_EAX)
#define CLI_STI_CLOBBERS , "%eax"
#define CLI_STI_INPUT_ARGS \
, \
- [irq_disable] "i" (offsetof(struct paravirt_ops, irq_disable)), \
- [irq_enable] "i" (offsetof(struct paravirt_ops, irq_enable))
+ [irq_disable] "i" (offsetof(struct paravirt_mod_ops, irq_disable)), \
+ [irq_enable] "i" (offsetof(struct paravirt_mod_ops, irq_enable))
#else /* __ASSEMBLY__ */
@@ -534,13 +544,13 @@ 772:; \
#define DISABLE_INTERRUPTS(clobbers) \
PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers, \
pushl %ecx; pushl %edx; \
- call *paravirt_ops+PARAVIRT_irq_disable; \
+ call *paravirt_mod_ops+PARAVIRT_irq_disable; \
popl %edx; popl %ecx) \
#define ENABLE_INTERRUPTS(clobbers) \
PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers, \
pushl %ecx; pushl %edx; \
- call *%cs:paravirt_ops+PARAVIRT_irq_enable; \
+ call *%cs:paravirt_mod_ops+PARAVIRT_irq_enable; \
popl %edx; popl %ecx)
#define ENABLE_INTERRUPTS_SYSEXIT \
@@ -548,7 +558,7 @@ 772:; \
jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit)
#define GET_CR0_INTO_EAX \
- call *paravirt_ops+PARAVIRT_read_cr0
+ call *paravirt_mod_ops+PARAVIRT_read_cr0
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_PARAVIRT */
diff -r 3242f865ce8e arch/i386/kernel/asm-offsets.c
--- a/arch/i386/kernel/asm-offsets.c Thu Jan 04 20:17:02 2007 -0800
+++ b/arch/i386/kernel/asm-offsets.c Fri Jan 05 16:49:15 2007 -0800
@@ -104,11 +104,11 @@ void foo(void)
#ifdef CONFIG_PARAVIRT
BLANK();
- OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled);
- OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable);
- OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable);
+ OFFSET(PARAVIRT_enabled, paravirt_mod_ops, paravirt_enabled);
+ OFFSET(PARAVIRT_irq_disable, paravirt_mod_ops, irq_disable);
+ OFFSET(PARAVIRT_irq_enable, paravirt_mod_ops, irq_enable);
OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit);
OFFSET(PARAVIRT_iret, paravirt_ops, iret);
- OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
+ OFFSET(PARAVIRT_read_cr0, paravirt_mod_ops, read_cr0);
#endif
}
next prev parent reply other threads:[~2007-01-06 1:02 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-01-06 0:07 [patch] paravirt: isolate module ops Ingo Molnar
2007-01-06 0:31 ` Zachary Amsden
2007-01-06 6:25 ` Rusty Russell
2007-01-06 7:08 ` Ingo Molnar
2007-01-06 7:12 ` Ingo Molnar
2007-01-06 17:42 ` Rusty Russell
2007-01-06 18:32 ` Ingo Molnar
2007-01-06 20:55 ` Zachary Amsden
2007-01-07 1:09 ` Rusty Russell
2007-01-07 14:07 ` Zachary Amsden
2007-01-06 7:14 ` Ingo Molnar
2007-01-06 9:50 ` Zachary Amsden
2007-01-06 10:52 ` Arjan van de Ven
2007-01-06 16:18 ` Rusty Russell
2007-01-06 19:31 ` Christoph Hellwig
2007-01-07 5:35 ` Rusty Russell
2007-01-07 18:39 ` Christoph Hellwig
2007-01-07 18:43 ` Ingo Molnar
2007-01-08 0:54 ` Dave Airlie
2007-01-06 1:02 ` Zachary Amsden [this message]
2007-01-06 2:07 ` Arjan van de Ven
2007-01-06 4:41 ` Zachary Amsden
2007-01-06 5:34 ` Rusty Russell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=459EF537.6090301@vmware.com \
--to=zach@vmware.com \
--cc=akpm@osdl.org \
--cc=arjan@infradead.org \
--cc=bunk@stusta.de \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=rusty@rustcorp.com.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).